From 74c41d7f552e10b20a75b5797ac70487be48cc0f Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 01/61] finish merge remote master --- Makefile | 4 +- include/mxnet/io.h | 105 +++++++++++++++++++++++ src/io/inst_vector.h | 117 ++++++++++++++++++++++++++ src/io/io.cc | 60 +++++++++++++ src/io/iter_mnist-inl.h | 181 ++++++++++++++++++++++++++++++++++++++++ test/io_mnist_test.cc | 96 +++++++++++++++++++++ 6 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 include/mxnet/io.h create mode 100644 src/io/inst_vector.h create mode 100644 src/io/io.cc create mode 100644 src/io/iter_mnist-inl.h create mode 100644 test/io_mnist_test.cc diff --git a/Makefile b/Makefile index fe906dcd1a1e..ee1dce160332 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o +OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -94,11 +94,13 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu +io.o: src/io/io.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) test/test_storage: test/test_storage.cc lib/libmxnet.a +test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/io.h b/include/mxnet/io.h new file mode 100644 index 000000000000..29dccbace770 --- /dev/null +++ b/include/mxnet/io.h @@ -0,0 +1,105 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file io.h + * \brief mxnet io data structure and data iterator + */ +#ifndef MXNET_IO_H_ +#define MXNET_IO_H_ +#include +#include +#include +#include +#include "./base.h" + +namespace mxnet { +/*! + * \brief iterator type + * \tparam DType data type + */ +template +class IIterator : public dmlc::DataIter { + public: + /*! + * \brief set the parameter + * \param name name of parameter + * \param val value of parameter + */ + virtual void SetParam(const char *name, const char *val) = 0; + /*! \brief initalize the iterator so that we can use the iterator */ + virtual void Init(void) = 0; + /*! \brief set before first of the item */ + virtual void BeforeFirst(void) = 0; + /*! \brief move to next item */ + virtual bool Next(void) = 0; + /*! \brief get current data */ + virtual const DType &Value(void) const = 0; + /*! \brief constructor */ + virtual ~IIterator(void) {} + /*! \brief store the name of each data, it could be used for making NArrays */ + std::vector data_names; + /*! \brief set data name to each attribute of data */ + inline void SetDataName(const std::string data_name){ + data_names.push_back(data_name); + } +}; // class IIterator + +/*! \brief a single data instance */ +struct DataInst { + /*! \brief unique id for instance */ + unsigned index; + /*! \brief content of data */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; +}; // struct DataInst + +/*! + * \brief a standard batch of data commonly used by iterator + * a databatch contains multiple TBlobs. Each Tblobs has + * a name stored in a map. There's no different between + * data and label, how we use them is to see the DNN implementation. + */ +struct DataBatch { + public: + /*! \brief unique id for instance, can be NULL, sometimes is useful */ + unsigned *inst_index; + /*! \brief number of instance */ + mshadow::index_t batch_size; + /*! \brief number of padding elements in this batch, + this is used to indicate the last elements in the batch are only padded up to match the batch, and should be discarded */ + mshadow::index_t num_batch_padd; + public: + /*! \brief content of dense data, if this DataBatch is dense */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; + public: + /*! \brief constructor */ + DataBatch(void) { + inst_index = NULL; + batch_size = 0; num_batch_padd = 0; + } + /*! \brief giving name to the data */ + void Naming(std::vector names); +}; // struct DataBatch + +/*! + * \brief create the databatch iterator IIterator + * \param cfg configure settings key=vale pair + * \return the data IIterator ptr + */ +IIterator *CreateIterator(const std::vector > &cfg); +/*! + * \brief create the databatch iterator IIterator from config file + * \param cfg_path configure file path + * \return the data IIterator ptr + */ +IIterator *CreateIteratorFromConfig(const char* cfg_path); +/*! + * \brief create the databatch iterator IIterator by iter name + * \param iter_name can be mnist, imgrec and so on + * \return the data IIterator ptr + */ +IIterator *CreateIteratorByName(const char* iter_name); +} // namespace mxnet +#endif // MXNET_IO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h new file mode 100644 index 000000000000..1ae734631680 --- /dev/null +++ b/src/io/inst_vector.h @@ -0,0 +1,117 @@ +/*! + * Copyright (c) 2015 by Contributors + * \inst_vector.h + * \brief holder of a sequence of DataInst in CPU + * that are not necessarily of same shape + */ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ +#include +#include +#include +#include +#include "./data.h" +namespace mxnet { +/*! + * \brief tensor vector that can store sequence of tensor + * in a memory compact way, tensors do not have to be of same shape + */ +template +class TensorVector { + public: + TensorVector(void) { + this->Clear(); + } + // get i-th tensor + inline mshadow::Tensor + operator[](size_t i) const { + CHECK(i + 1 < offset_.size()); + CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); + return mshadow::Tensor + (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + } + inline mshadow::Tensor Back() const { + return (*this)[Size() - 1]; + } + inline size_t Size(void) const { + return shape_.size(); + } + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(mshadow::Shape shape) { + shape_.push_back(shape); + offset_.push_back(offset_.back() + shape.Size()); + content_.resize(offset_.back()); + } + inline void Clear(void) { + offset_.clear(); + offset_.push_back(0); + content_.clear(); + shape_.clear(); + } + + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector > shape_; +}; + +/*! + * \brief tblob vector that can store sequence of tblob + * in a memory compact way, tblobs do not have to be of same shape + */ +template +class TBlobVector { + public: + TBlobVector(void) { + this->Clear(); + } + // get i-th tblob + inline TBlob operator[](size_t i) const; + // get the last tblob + inline TBlob Back(); + // return the size of the vector + inline size_t Size(void) const; + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(TShape shape_); + inline void Clear(void); + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector shape_; +}; + +/*! + * \brief instance vector that can holds + * non-uniform shape data instance in a shape efficient way + */ +class InstVector { + public: + inline size_t Size(void) const { + return index_.size(); + } + // instance + inline DataInst operator[](size_t i) const; + // get back of instance vector + inline DataInst Back() const; + // clear the container + inline void Clear(void); + // push the newly coming instance + inline void Push(unsigned index, TBlob data_); + + private: + /*! \brief index of the data */ + std::vector index_; + // data + std::vector > data_; + // extra data + std::vector extra_data_; +}; +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc new file mode 100644 index 000000000000..2df16e4fc209 --- /dev/null +++ b/src/io/io.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2015 by Contributors +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include +#include +#include +#include +#include +#include +#include +#include "iter_mnist-inl.h" +#include "../utils/random.h" + +namespace mxnet { + IIterator *CreateIterator( + const std::vector< std::pair > &cfg) { + size_t i = 0; + IIterator *it = NULL; + for (; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "iter")) { + if (!strcmp(val, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); continue; + } + CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; + } + if (it != NULL) { + it->SetParam(name, val); + } + } + CHECK(it != NULL) << "must specify iterator by iter=itername"; + return it; + } + + IIterator *CreateIteratorFromConfig(const char* cfg_path) { + std::ifstream ifs(cfg_path, std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + dmlc::Config cfg(ifs); + for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + dmlc::Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + return CreateIterator(itcfg); + } + + IIterator *CreateIteratorByName(const char* iter_name) { + IIterator *it = NULL; + // Currently only support mnist + if (!strcmp(iter_name, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); + } + CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; + return it; + } +} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h new file mode 100644 index 000000000000..376838fcf3f0 --- /dev/null +++ b/src/io/iter_mnist-inl.h @@ -0,0 +1,181 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist-inl.h + * \brief iterator that takes mnist dataset + */ +#ifndef MXNET_IO_ITER_MNIST_INL_H_ +#define MXNET_IO_ITER_MNIST_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include "../utils/random.h" + +namespace mxnet { +class MNISTIterator: public IIterator { + public: + MNISTIterator(void) { + img_.dptr_ = NULL; + mode_ = 1; + inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); + out_.data.resize(2); + } + virtual ~MNISTIterator(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } + // intialize iterator loads data in + virtual void Init(void) { + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (mode_ == 1) { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = batch_size_; + if (shuffle_) this->Shuffle(); + if (silent_ == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + batch_size_ <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += batch_size_; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + rnd.Shuffle(&inst_); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief silent */ + int silent_; + /*! \brief path */ + std::string path_img, path_label; + /*! \brief output */ + DataBatch out_; + /*! \brief whether do shuffle */ + int shuffle_; + /*! \brief data mode */ + int mode_; + /*! \brief current location */ + index_t loc_; + /*! \brief batch size */ + index_t batch_size_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // random sampler + utils::RandomSampler rnd; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIterator +} // namespace mxnet +#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc new file mode 100644 index 000000000000..2bfba24a507a --- /dev/null +++ b/test/io_mnist_test.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2015 by Contributors +// IO test code + +#include +#include +#include +#include +#include +#include "mxnet/io.h" +#include "../src/io/iter_mnist-inl.h" + +using namespace std; +using namespace mxnet; +using namespace dmlc; + +void InitIter(IIterator* itr, + const std::vector< std::pair< std::string, std::string> > &defcfg) { + for (size_t i = 0; i < defcfg.size(); ++i) { + itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); + } + itr->Init(); +} + +IIterator* CreateIterators( + const std::vector< std::pair< std::string, std::string> >& cfg) { + IIterator* data_itr = NULL; + int flag = 0; + std::string evname; + std::vector< std::pair< std::string, std::string> > itcfg; + std::vector< std::pair< std::string, std::string> > defcfg; + for (size_t i = 0; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "data")) { + flag = 1; continue; + } + if (!strcmp(name, "eval")) { + flag = 2; continue; + } + if (!strcmp(name, "pred")) { + flag = 3; continue; + } + if (!strcmp(name, "iterend") && !strcmp(val, "true")) { + if (flag == 1) { + data_itr = mxnet::CreateIterator(itcfg); + } + flag = 0; itcfg.clear(); + } + if (flag == 0) { + defcfg.push_back(cfg[i]); + } else { + itcfg.push_back(cfg[i]); + } + } + if (data_itr != NULL) { + InitIter(data_itr, defcfg); + } + return data_itr; +} + +/*! + * Usage: ./io_mnist_test /path/to/io_config/file + * Example + * data = train + * iter = mnist + * path_img = "./data/mnist/train-images-idx3-ubyte" + * path_label = "./data/mnist/train-labels-idx1-ubyte" + * shuffle = 1 + * iterend = true + * input_shape = 1,1,784 + * batch_size = 100 + * + */ + +int main(int argc, char** argv) { + std::ifstream ifs(argv[1], std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + Config cfg(ifs); + for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + IIterator* data_itr = CreateIterators(itcfg); + data_itr->BeforeFirst(); + int batch_dir = 0; + while (data_itr->Next()) { + std::cout << "Label of Batch " << batch_dir++ << std::endl; + // print label + DataBatch db = data_itr->Value(); + mshadow::Tensor label = db.data[1].get(); + for (size_t i = 0; i < label.shape_.shape_[0]; i++) + std::cout << label.dptr_[i] << " "; + std::cout << "\n"; + } +} From 63ff6d054daa9aeb4ef2a974aaa9b22d37dd3f07 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 02/61] built in python, start polishing new feature required --- Makefile | 4 +-- include/mxnet/c_api.h | 23 ++++++++++++ python/mxnet/base.py | 2 +- python/mxnet/io.py | 84 +++++++++++++++++++++++++++++++++++++++++++ python/test_io.py | 22 ++++++++++++ src/c_api.cc | 50 ++++++++++++++++++++++++++ 6 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 python/mxnet/io.py create mode 100644 python/test_io.py diff --git a/Makefile b/Makefile index ee1dce160332..fbdd683d9792 100644 --- a/Makefile +++ b/Makefile @@ -96,8 +96,8 @@ softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu io.o: src/io/io.cc -lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) -lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) +lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) +lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 1178af9db5fd..1297e48fbac8 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -447,6 +447,29 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/mxnet/base.py b/python/mxnet/base.py index c514d6939988..9d5026f126cf 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -70,7 +70,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p - +DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition #---------------------------- diff --git a/python/mxnet/io.py b/python/mxnet/io.py new file mode 100644 index 000000000000..96e4938a79b3 --- /dev/null +++ b/python/mxnet/io.py @@ -0,0 +1,84 @@ +# coding: utf-8 + +"""NArray interface of mxnet""" +from __future__ import absolute_import + +import ctypes +from .base import _LIB +from .base import DataIterHandle, NArrayHandle +from .base import check_call +from .narray import NArray + +class DataIter(object): + """DataIter object in mxnet + + DataIter is a wrapper for C++ DataIter functions + """ + + def __init__(self): + """initialize a new dataiter + + """ + self._datahandle = None + + def createfromcfg(self, cfg_path): + """create a dataiter from config file + + cfg_path is the path of configure file + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) + self._datahandle = hdl + + def createbyname(self, iter_name): + """create a dataiter by the name + + iter_name can be mnist imgrec or so on + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) + self._datahandle = hdl + + def setparam(self, name, val): + """set param value for dataiter + + name prameter name + val parameter value + """ + check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) + + def init(self): + """init dataiter + + """ + check_call(_LIB.MXIOInit(self._datahandle)) + + def beforefirst(self): + """set loc to 0 + + """ + check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + + def next(self): + """init dataiter + + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + return next_res.value + + def getdata(self): + """get data from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) + + def getlabel(self): + """get label from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) diff --git a/python/test_io.py b/python/test_io.py new file mode 100644 index 000000000000..6909176d11c2 --- /dev/null +++ b/python/test_io.py @@ -0,0 +1,22 @@ +#pylint: skip-file +import mxnet as mx + +dataiter = mx.io.DataIter() +#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') +dataiter.createbyname('mnist') +dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") +dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") +dataiter.setparam('shuffle', '1') +dataiter.setparam('seed_data', '2') +dataiter.setparam('batch_size', '100') + +dataiter.init() + +dataiter.beforefirst() + +for i in range(100): + dataiter.next() + info = "Batch %d" % (i) + print info + label = dataiter.getdata() + print label.numpy diff --git a/src/c_api.cc b/src/c_api.cc index a5ed648469e1..5286551a3c1d 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -597,3 +598,52 @@ int MXExecutorBind(SymbolHandle symbol_handle, API_END(); } +int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out) { + API_BEGIN(); + *out = static_cast(CreateIteratorFromConfig(cfg)); + API_END(); +} + +int MXIOCreateByName(const char *iter_name, DataIterHandle *out) { + API_BEGIN(); + *out = static_cast(CreateIteratorByName(iter_name)); + API_END(); +} + +int MXIOSetParam(DataIterHandle handle, const char *name, const char *val) { + API_BEGIN(); + static_cast* >(handle)->SetParam(name, val); + API_END(); +} + +int MXIOInit(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->Init(); + API_END(); +} + +int MXIOBeforeFirst(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->BeforeFirst(); + API_END(); +} + +int MXIONext(DataIterHandle handle, int *out) { + API_BEGIN(); + *out = static_cast* >(handle)->Next(); + API_END(); +} + +int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[1], 0); + API_END(); +} + +int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[0], 0); + API_END(); +} From 065b061dea2bca07bf675a2acf361bc273a88e1b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 03/61] finish old version registry in C --- Makefile | 5 ++- include/mxnet/c_api.h | 34 +++++++++++++++++ include/mxnet/io.h | 5 +++ python/mxnet/base.py | 1 + src/c_api.cc | 39 +++++++++++++++++++ src/io/io.cc | 4 +- src/io/iter_mnist-inl.h | 83 ++++++++++++++++++++++++----------------- src/io/iter_mnist.cc | 17 +++++++++ 8 files changed, 149 insertions(+), 39 deletions(-) create mode 100644 src/io/iter_mnist.cc diff --git a/Makefile b/Makefile index fbdd683d9792..09b6c523f179 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o +OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o iter_mnist.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -95,12 +95,13 @@ pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu io.o: src/io/io.cc +iter_mnist.o: src/io/iter_mnist.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 1297e48fbac8..bafb06cd28e8 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -36,6 +36,8 @@ typedef void *SymbolHandle; typedef void *AtomicSymbolHandle; /*! \brief handle to an Executor */ typedef void *ExecutorHandle; +/*! \brief handle a dataiter creator */ +typedef void *DataIterCreator; /*! \brief handle to a DataIterator */ typedef void *DataIterHandle; /*! @@ -506,5 +508,37 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, */ MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 29dccbace770..16c86138abe1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -25,6 +25,11 @@ class IIterator : public dmlc::DataIter { * \param val value of parameter */ virtual void SetParam(const char *name, const char *val) = 0; + /*! + * \brief init the parameter + * \param kwargs key-value pairs + */ + virtual void InitParams(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 9d5026f126cf..63f72b87ad59 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -70,6 +70,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p +DataIterCreatorHandle = ctypes.c_void_p DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition diff --git a/src/c_api.cc b/src/c_api.cc index 5286551a3c1d..6bdbc19b72ee 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -647,3 +648,41 @@ int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { *out = new NArray(db.data[0], 0); API_END(); } + +int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array) { + API_BEGIN(); + auto &vec = Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) + API_END(); +} + +int MXIOIterGetName(DataIterCreator iter, + const char **out_name) { + API_BEGIN(); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); + API_END(); +} + +int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IOIteratorEntry *e = static_cast(creator); + IIterator *iter = (*e)(); + API_BEGIN(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->InitParams(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); +} + + + + diff --git a/src/io/io.cc b/src/io/io.cc index 2df16e4fc209..60c013a812a5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -23,7 +23,7 @@ namespace mxnet { if (!strcmp(name, "iter")) { if (!strcmp(val, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); continue; + it = new io::MNISTIterator(); continue; } CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; } @@ -52,7 +52,7 @@ namespace mxnet { // Currently only support mnist if (!strcmp(iter_name, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); + it = new io::MNISTIterator(); } CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; return it; diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 376838fcf3f0..62168f8f1811 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -10,19 +10,42 @@ #include #include #include +#include #include #include #include "../utils/random.h" namespace mxnet { +namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string path_img, path_label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + int input_flat; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(Param) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); + DMLC_DECLARE_FIELD(shuffle).set_default(false); + DMLC_DECLARE_FIELD(silent).set_default(false); + DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) + .add_enum("noflat", 0).set_default(1); + } +}; + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -30,15 +53,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { @@ -47,34 +64,34 @@ class MNISTIterator: public IIterator { // set name this->SetDataName(std::string("data")); this->SetDataName(std::string("label")); - if (mode_ == 1) { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + if (param.input_flat == 1) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); } else { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); } out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); batch_label_.stride_ = 1; batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = batch_size_; - if (shuffle_) this->Shuffle(); - if (silent_ == 0) { + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); } } virtual void BeforeFirst(void) { this->loc_ = 0; } virtual bool Next(void) { - if (loc_ + batch_size_ <= img_.size(0)) { + if (loc_ + param.batch_size <= img_.size(0)) { batch_data_.dptr_ = img_[loc_].dptr_; batch_label_.dptr_ = &labels_[loc_]; out_.data[0] = TBlob(batch_data_); out_.data[1] = TBlob(batch_label_); out_.inst_index = &inst_[loc_]; - loc_ += batch_size_; + loc_ += param.batch_size; return true; } else { return false; @@ -83,10 +100,13 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - + virtual void InitParams(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + } private: inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); ReadInt(stdimg); int image_count = ReadInt(stdimg); int image_rows = ReadInt(stdimg); @@ -111,7 +131,7 @@ class MNISTIterator: public IIterator { delete stdimg; } inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); ReadInt(stdlabel); int labels_count = ReadInt(stdlabel); labels_.resize(labels_count); @@ -146,20 +166,12 @@ class MNISTIterator: public IIterator { } private: - /*! \brief silent */ - int silent_; - /*! \brief path */ - std::string path_img, path_label; + /*! \brief MNIST iter params */ + MNISTParam param; /*! \brief output */ DataBatch out_; - /*! \brief whether do shuffle */ - int shuffle_; - /*! \brief data mode */ - int mode_; /*! \brief current location */ index_t loc_; - /*! \brief batch size */ - index_t batch_size_; /*! \brief image content */ mshadow::Tensor img_; /*! \brief label content */ @@ -177,5 +189,6 @@ class MNISTIterator: public IIterator { // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator +} // namespace io } // namespace mxnet #endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc new file mode 100644 index 000000000000..942398749378 --- /dev/null +++ b/src/io/iter_mnist.cc @@ -0,0 +1,17 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist.cc + * \brief register mnist iterator + * \author Tianjun Xiao +*/ +#include +#include "./iter_mnist-inl.h" + +namespace mxnet { +namespace io { + +DMLC_REGISTER_PARAMETER(MNISTParam); +REGISTER_IO_ITER(mnist, MNISTIterator); + +} // namespace io +} // namespace mxnet From 03f05d49196d576272090a5691767cf7826066e4 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 04/61] modify to dmlc registry --- include/mxnet/io.h | 30 ++++++++++++++++++++++++++++++ python/mxnet/io.py | 25 ++++++++++++++++++++++--- src/c_api.cc | 13 +++++++++---- src/io/io.cc | 5 +++++ src/io/iter_mnist-inl.h | 20 +++++++++++++------- src/io/iter_mnist.cc | 5 +++-- 6 files changed, 82 insertions(+), 16 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 16c86138abe1..600978023b5b 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -6,6 +6,7 @@ #ifndef MXNET_IO_H_ #define MXNET_IO_H_ #include +#include #include #include #include @@ -106,5 +107,34 @@ IIterator *CreateIteratorFromConfig(const char* cfg_path); * \return the data IIterator ptr */ IIterator *CreateIteratorByName(const char* iter_name); + +/*! \brief typedef the factory function of data iterator */ +typedef IIterator *(*DataIteratorFactory)(); +/*! + * \brief Registry entry for DataIterator factory functions. + */ +struct DataIteratorReg + : public dmlc::FunctionRegEntryBase { +}; +//-------------------------------------------------------------- +// The following part are API Registration of Iterators +//-------------------------------------------------------------- +/*! + * \brief Macro to register Iterators + * + * \code + * // example of registering a mnist iterator + * REGISTER_IO_ITERATOR(MNIST, MNISTIterator) + * .describe("Mnist data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_ITER(name, DataIteratorType) \ + static ::mxnet::IIterator* __create__ ## DataIteratorType ## __() { \ + return new DataIteratorType; \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## DataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 96e4938a79b3..ead49f07c4fd 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -5,16 +5,35 @@ import ctypes from .base import _LIB +from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray class DataIter(object): - """DataIter object in mxnet + """DataIter object in mxnet. List all the needed functions here. """ - DataIter is a wrapper for C++ DataIter functions - """ + def __init__(self, handle): + """Initialize with handle + Parameters + ---------- + handle : DataIterHandle + the handle to the underlying C++ Data Iterator + """ + self.handle = handle + + def __del__(self): + check_call(_LIB.MXDataIterFree(self.handle)) + + + + + + + + + def __init__(self): """initialize a new dataiter diff --git a/src/c_api.cc b/src/c_api.cc index 6bdbc19b72ee..f1ba4abb6233 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -652,7 +652,7 @@ int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { int MXListIOIters(mx_uint *out_size, DataIterCreator **out_array) { API_BEGIN(); - auto &vec = Registry::List(); + auto &vec = dmlc::Registry::List(); *out_size = static_cast(vec.size()); *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) API_END(); @@ -661,7 +661,7 @@ int MXListIOIters(mx_uint *out_size, int MXIOIterGetName(DataIterCreator iter, const char **out_name) { API_BEGIN(); - auto *f = static_cast(iter); + auto *f = static_cast(iter); *out_name = f->name.c_str(); API_END(); } @@ -671,8 +671,8 @@ int MXCreateIOIterator(DataIterCreator creator, const char **keys, const char **vals, DataIterHandle *out) { - IOIteratorEntry *e = static_cast(creator); - IIterator *iter = (*e)(); + DataIteratorReg *e = static_cast(creator); + IIterator *iter = e->body(); API_BEGIN(); std::vector > kwargs; for (int i = 0; i < num_param; ++i) { @@ -683,6 +683,11 @@ int MXCreateIOIterator(DataIterCreator creator, API_END_HANDLE_ERROR(delete iter); } +int MXDataIterFree(DataIterHandle iter) { + API_BEGIN(); + delete static_cast *>(symbol); + API_END(); +} diff --git a/src/io/io.cc b/src/io/io.cc index 60c013a812a5..aafe85073a52 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,10 @@ #include "iter_mnist-inl.h" #include "../utils/random.h" +namespace dmlc { +DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + namespace mxnet { IIterator *CreateIterator( const std::vector< std::pair > &cfg) { diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 62168f8f1811..6a705c483e5b 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -30,14 +30,20 @@ struct MNISTParam : public dmlc::Parameter { /*! \brief data mode */ int input_flat; // declare parameters in header file - DMLC_DECLARE_PARAMETER(Param) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); - DMLC_DECLARE_FIELD(shuffle).set_default(false); - DMLC_DECLARE_FIELD(silent).set_default(false); - DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1); + .add_enum("noflat", 0).set_default(1) + .describe("Whether to flat the data into 1D."); } }; diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 942398749378..c6fab8d376d7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,15 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -REGISTER_IO_ITER(mnist, MNISTIterator); +MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) + .describe("Create MNISTIterator") + .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From 7144cb4a4c31ba3c71421359f66eb683ccf072b7 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 05/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 110 +++++++++++--------------- include/mxnet/io.h | 4 +- python/mxnet/__init__.py | 1 + python/mxnet/io.py | 166 ++++++++++++++++++++++++++++----------- python/test_io.py | 27 +++---- python/test_mnist.py | 79 ++++++------------- src/c_api.cc | 99 +++++++++++------------ src/io/iter_mnist-inl.h | 3 +- src/io/iter_mnist.cc | 2 +- 9 files changed, 256 insertions(+), 235 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index bafb06cd28e8..0949543b33e8 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -441,64 +441,82 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, // Part 5: IO Interface //-------------------------------------------- /*! - * \brief create an data iterator from configs string - * \param cfg config string that contains the - * configuration about the iterator - * \param out the handle to the iterator + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateFromConfig(const char *cfg, - DataIterHandle *out); +MXNET_DLL int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator * \param out return value of next * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIONext(DataIterHandle handle, +MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! * \brief call iterator.BeforeFirst * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOBeforeFirst(DataIterHandle handle); -/*! - * \brief free the handle to the IO module - * \param handle the handle pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOFree(DataIterHandle handle); +MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); + /*! * \brief get the handle to the NArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetData(DataIterHandle handle, +MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out); /*! * \brief get the handle to the NArray of underlying label @@ -506,39 +524,7 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, * \param out the handle to underlying label NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetLabel(DataIterHandle handle, +MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 600978023b5b..4ca5ed05fd18 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -27,10 +27,10 @@ class IIterator : public dmlc::DataIter { */ virtual void SetParam(const char *name, const char *val) = 0; /*! - * \brief init the parameter + * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void InitParams(const std::vector >& kwargs) = 0; + virtual void SetInit(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 77748dd1950c..75cfd2d88675 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -12,6 +12,7 @@ from .context import Context, current_context from . import narray from . import symbol +from . import io __version__ = "0.1.0" diff --git a/python/mxnet/io.py b/python/mxnet/io.py index ead49f07c4fd..baee99a02d61 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -4,6 +4,7 @@ from __future__ import absolute_import import ctypes +import sys from .base import _LIB from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle @@ -25,65 +26,42 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. - - - - - - - - - def __init__(self): - """initialize a new dataiter - - """ - self._datahandle = None - - def createfromcfg(self, cfg_path): - """create a dataiter from config file - - cfg_path is the path of configure file - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) - self._datahandle = hdl - - def createbyname(self, iter_name): - """create a dataiter by the name - - iter_name can be mnist imgrec or so on - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) - self._datahandle = hdl - - def setparam(self, name, val): - """set param value for dataiter - - name prameter name - val parameter value - """ - check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) - - def init(self): - """init dataiter - + Parameters + --------- + args: + provide positional arguments, should not be given. + + kwargs: + provide keyword arguments + Returns + ------- + the inited iterator """ - check_call(_LIB.MXIOInit(self._datahandle)) + if len(args) != 0: + raise TypeError('data iterator only accept \ + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) + check_call(_LIB.MXDataIterSetInit( \ + self.handle, num_args, keys, vals)) def beforefirst(self): """set loc to 0 """ - check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): """init dataiter """ next_res = ctypes.c_int(0) - check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value def getdata(self): @@ -91,7 +69,7 @@ def getdata(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetData(self.handle, ctypes.byref(hdl))) return NArray(hdl) def getlabel(self): @@ -99,5 +77,97 @@ def getlabel(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NArray(hdl) + +def _make_io_iterator(handle): + """Create an io iterator by handle.""" + name = ctypes.c_char_p() + desc = ctypes.c_char_p() + num_args = mx_uint() + arg_names = ctypes.POINTER(ctypes.c_char_p)() + arg_types = ctypes.POINTER(ctypes.c_char_p)() + arg_descs = ctypes.POINTER(ctypes.c_char_p)() + + check_call(_LIB.MXDataIterGetIterInfo( \ + handle, ctypes.byref(name), ctypes.byref(desc), \ + ctypes.byref(num_args), \ + ctypes.byref(arg_names), \ + ctypes.byref(arg_types), \ + ctypes.byref(arg_descs))) + iter_name = name.value + param_str = [] + for i in range(num_args.value): + ret = '%s : %s' % (arg_names[i], arg_types[i]) + if len(arg_descs[i]) != 0: + ret += '\n ' + arg_descs[i] + param_str.append(ret) + + doc_str = ('%s\n\n' + + 'Parameters\n' + + '----------\n' + + '%s\n' + + 'name : string, required.\n' + + ' Name of the resulting data iterator.\n\n' + + 'Returns\n' + + '-------\n' + + 'iterator: Iterator\n'+ + ' The result iterator.') + doc_str = doc_str % (desc.value, '\n'.join(param_str)) + + def creator(*args, **kwargs): + """Create an iterator. + The parameters listed below can be passed in as keyword arguments. + + Parameters + ---------- + name : string, required. + Name of the resulting data iterator. + + Returns + ------- + symbol: Symbol + the resulting symbol + """ + param_keys = [] + param_vals = [] + symbol_kwargs = {} + name = kwargs.pop('name', None) + + for k, v in kwargs.items(): + param_keys.append(c_str(k)) + param_vals.append(c_str(str(v))) + # create atomic symbol + param_keys = c_array(ctypes.c_char_p, param_keys) + param_vals = c_array(ctypes.c_char_p, param_vals) + iter_handle = DataIterHandle() + check_call(_LIB.MXDataIterCreateIter( + handle, len(param_keys), + param_keys, param_vals, + ctypes.byref(iter_handle))) + + if len(args): + raise TypeError('%s can only accept keyword arguments' % iter_name) + + return DataIter(iter_handle) + + creator.__name__ = iter_name + creator.__doc__ = doc_str + return creator + + +def _init_io_module(): + """List and add all the data iterators to current module.""" + plist = ctypes.POINTER(ctypes.c_void_p)() + size = ctypes.c_uint() + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + + module_obj = sys.modules[__name__] + for i in range(size.value): + hdl = ctypes.c_void_p(plist[i]) + dataiter = _make_io_iterator(hdl) + setattr(module_obj, dataiter.__name__, dataiter) + +# Initialize the io in startups +_init_io_module() diff --git a/python/test_io.py b/python/test_io.py index 6909176d11c2..d15d4cc32fcd 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -1,22 +1,21 @@ #pylint: skip-file import mxnet as mx +import numpy as np +import os -dataiter = mx.io.DataIter() -#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') -dataiter.createbyname('mnist') -dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") -dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") -dataiter.setparam('shuffle', '1') -dataiter.setparam('seed_data', '2') -dataiter.setparam('batch_size', '100') - -dataiter.init() +dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") dataiter.beforefirst() -for i in range(100): - dataiter.next() - info = "Batch %d" % (i) +idx = 0 +while dataiter.next(): + info = "Batch %d" % (idx) + idx += 1 print info - label = dataiter.getdata() + ''' + label = dataiter.getlabel() print label.numpy + ''' + diff --git a/python/test_mnist.py b/python/test_mnist.py index 3a3ee85a8d3f..fa0f29a60033 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -14,7 +14,7 @@ def Softmax(x): def CalAcc(out, label): pred = np.argmax(out, axis=1) - return np.sum(pred == label) * 1.0 / out.shape[0] + return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] def SetGradient(out_grad, label): assert(out_grad.shape[0] == label.shape[0]) @@ -22,45 +22,6 @@ def SetGradient(out_grad, label): k = label[i] out_grad[i][k] -= 1.0 -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - return (self.data[start:end, :], self.label[start:end]) - - - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -69,7 +30,7 @@ def Get(self): fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) args_list = fc2.list_arguments() # infer shape -data_shape = (batch_size, 784) +data_shape = (batch_size, 1, 1, 784) arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] @@ -104,20 +65,30 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train = MNISTIter("train", batch_size) -valid = MNISTIter("valid", batch_size) +train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +train_dataiter.beforefirst() +val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +val_dataiter.beforefirst() for i in xrange(epoch): # train print "Epoch %d" % i train_acc = 0.0 val_acc = 0.0 - while train.Next(): - data, label = train.Get() - inputs["data"].numpy[:] = data + train_nbatch = 0 + val_nbatch = 0 + while train_dataiter.next(): + data = train_dataiter.getdata() + label = train_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy SetGradient(grad_narray.numpy, label) executor.backward([grad_narray]) @@ -126,15 +97,17 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - while valid.Next(): - data, label = valid.Get() - inputs["data"].numpy[:] = data + while val_dataiter.next(): + data = val_dataiter.getdata() + label = val_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() val_acc += CalAcc(out_narray.numpy, label) - print "Train Acc: ", train_acc / train.nbatch - print "Valid Acc: ", val_acc / valid.nbatch - train.BeforeFirst() - valid.BeforeFirst() + val_nbatch += 1 + print "Train Acc: ", train_acc / train_nbatch + print "Valid Acc: ", val_acc / val_nbatch + train_dataiter.beforefirst() + val_dataiter.beforefirst() diff --git a/src/c_api.cc b/src/c_api.cc index f1ba4abb6233..86425c4290b8 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -599,95 +599,86 @@ int MXExecutorBind(SymbolHandle symbol_handle, API_END(); } -int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out) { +//-------------------------------------------- +// Part 5: IO Interface +//-------------------------------------------- +int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array) { API_BEGIN(); - *out = static_cast(CreateIteratorFromConfig(cfg)); + auto &vec = dmlc::Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) API_END(); } -int MXIOCreateByName(const char *iter_name, DataIterHandle *out) { +int MXDataIterGetName(DataIterCreator iter, + const char **out_name) { API_BEGIN(); - *out = static_cast(CreateIteratorByName(iter_name)); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); API_END(); } -int MXIOSetParam(DataIterHandle handle, const char *name, const char *val) { +int MXDataIterGetIterInfo(DataIterCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions) { + DataIteratorReg *e = static_cast(creator); + return MXAPIGetFunctionRegInfo(e, name, description, num_args, + arg_names, arg_type_infos, arg_descriptions); +} + +int MXDataIterCreateIter(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IIterator *iter = nullptr; API_BEGIN(); - static_cast* >(handle)->SetParam(name, val); - API_END(); + DataIteratorReg *e = static_cast(creator); + iter = e->body(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->SetInit(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); } -int MXIOInit(DataIterHandle handle) { +int MXDataIterFree(DataIterHandle handle) { API_BEGIN(); - static_cast* >(handle)->Init(); + delete static_cast *>(handle); API_END(); } -int MXIOBeforeFirst(DataIterHandle handle) { +int MXDataIterBeforeFirst(DataIterHandle handle) { API_BEGIN(); static_cast* >(handle)->BeforeFirst(); API_END(); } -int MXIONext(DataIterHandle handle, int *out) { +int MXDataIterNext(DataIterHandle handle, int *out) { API_BEGIN(); *out = static_cast* >(handle)->Next(); API_END(); } -int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out) { +int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out) { API_BEGIN(); DataBatch db = static_cast* >(handle)->Value(); *out = new NArray(db.data[1], 0); API_END(); } -int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { +int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { API_BEGIN(); DataBatch db = static_cast* >(handle)->Value(); *out = new NArray(db.data[0], 0); API_END(); } -int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array) { - API_BEGIN(); - auto &vec = dmlc::Registry::List(); - *out_size = static_cast(vec.size()); - *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) - API_END(); -} - -int MXIOIterGetName(DataIterCreator iter, - const char **out_name) { - API_BEGIN(); - auto *f = static_cast(iter); - *out_name = f->name.c_str(); - API_END(); -} - -int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out) { - DataIteratorReg *e = static_cast(creator); - IIterator *iter = e->body(); - API_BEGIN(); - std::vector > kwargs; - for (int i = 0; i < num_param; ++i) { - kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); - } - iter->InitParams(kwargs); - *out = iter; - API_END_HANDLE_ERROR(delete iter); -} - -int MXDataIterFree(DataIterHandle iter) { - API_BEGIN(); - delete static_cast *>(symbol); - API_END(); -} - - diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 6a705c483e5b..88a3e4d82acd 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -106,9 +106,10 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - virtual void InitParams(const std::vector >& kwargs) { + virtual void SetInit(const std::vector >& kwargs) { std::map kmap(kwargs.begin(), kwargs.end()); param.Init(kmap); + this->Init(); } private: inline void LoadImage(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index c6fab8d376d7..d6119d6c8a69 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -10,7 +10,7 @@ namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) +MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); From 4f6ef5d79754e956df8d686c832d03e243fa9912 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 06/61] clean io interface --- include/mxnet/io.h | 25 --------------------- python/mxnet/io.py | 14 +++++------- python/test_mnist.py | 4 +--- src/common/utils.h | 6 ++++++ src/io/io.cc | 48 ----------------------------------------- src/io/iter_mnist-inl.h | 25 +++++++++++---------- 6 files changed, 24 insertions(+), 98 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 4ca5ed05fd18..ac22919745a1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -20,12 +20,6 @@ namespace mxnet { template class IIterator : public dmlc::DataIter { public: - /*! - * \brief set the parameter - * \param name name of parameter - * \param val value of parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief set the parameters and init iter * \param kwargs key-value pairs @@ -89,25 +83,6 @@ struct DataBatch { void Naming(std::vector names); }; // struct DataBatch -/*! - * \brief create the databatch iterator IIterator - * \param cfg configure settings key=vale pair - * \return the data IIterator ptr - */ -IIterator *CreateIterator(const std::vector > &cfg); -/*! - * \brief create the databatch iterator IIterator from config file - * \param cfg_path configure file path - * \return the data IIterator ptr - */ -IIterator *CreateIteratorFromConfig(const char* cfg_path); -/*! - * \brief create the databatch iterator IIterator by iter name - * \param iter_name can be mnist, imgrec and so on - * \return the data IIterator ptr - */ -IIterator *CreateIteratorByName(const char* iter_name); - /*! \brief typedef the factory function of data iterator */ typedef IIterator *(*DataIteratorFactory)(); /*! diff --git a/python/mxnet/io.py b/python/mxnet/io.py index baee99a02d61..dba36bd2114c 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -6,7 +6,7 @@ import ctypes import sys from .base import _LIB -from .base import c_array, c_str, mx_uint, string_types +from .base import c_array, c_str, mx_uint from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,9 +43,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ - keyword arguments') + keyword arguments') num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -131,8 +131,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] - symbol_kwargs = {} - name = kwargs.pop('name', None) for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -160,9 +158,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - + check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/python/test_mnist.py b/python/test_mnist.py index fa0f29a60033..8c3e09ba3705 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -67,7 +67,7 @@ def Update(mom, grad, weight): train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") + batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) train_dataiter.beforefirst() val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", @@ -109,5 +109,3 @@ def Update(mom, grad, weight): train_dataiter.beforefirst() val_dataiter.beforefirst() - - diff --git a/src/common/utils.h b/src/common/utils.h index f55ebc26535f..cf1fd2f1bb36 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -10,12 +10,18 @@ #include #include #include +#include #endif // DMLC_USE_CXX11 namespace common { #if DMLC_USE_CXX11 +/*! + * \brief Random Engine + */ +typedef std::mt19937 RANDOM_ENGINE; + /*! * \brief Helper functions. */ diff --git a/src/io/io.cc b/src/io/io.cc index aafe85073a52..fb7a8c2d3092 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -11,55 +11,7 @@ #include #include #include "iter_mnist-inl.h" -#include "../utils/random.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); } // namespace dmlc - -namespace mxnet { - IIterator *CreateIterator( - const std::vector< std::pair > &cfg) { - size_t i = 0; - IIterator *it = NULL; - for (; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "iter")) { - if (!strcmp(val, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); continue; - } - CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; - } - if (it != NULL) { - it->SetParam(name, val); - } - } - CHECK(it != NULL) << "must specify iterator by iter=itername"; - return it; - } - - IIterator *CreateIteratorFromConfig(const char* cfg_path) { - std::ifstream ifs(cfg_path, std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - dmlc::Config cfg(ifs); - for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - dmlc::Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - return CreateIterator(itcfg); - } - - IIterator *CreateIteratorByName(const char* iter_name) { - IIterator *it = NULL; - // Currently only support mnist - if (!strcmp(iter_name, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); - } - CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; - return it; - } -} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 88a3e4d82acd..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -13,7 +13,9 @@ #include #include #include -#include "../utils/random.h" +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { @@ -29,6 +31,8 @@ struct MNISTParam : public dmlc::Parameter { int batch_size; /*! \brief data mode */ int input_flat; + /*! \brief random seed */ + int seed_data; // declare parameters in header file DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") @@ -36,33 +40,29 @@ struct MNISTParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") .describe("Mnist label path."); DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); + .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); + .describe("Whether to print out data info."); DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) .add_enum("noflat", 0).set_default(1) .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed_data).set_default(0) + .describe("Random Seed."); } }; - + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); @@ -111,6 +111,7 @@ class MNISTIterator: public IIterator { param.Init(kmap); this->Init(); } + private: inline void LoadImage(void) { dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); @@ -151,7 +152,7 @@ class MNISTIterator: public IIterator { delete stdlabel; } inline void Shuffle(void) { - rnd.Shuffle(&inst_); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { @@ -191,8 +192,6 @@ class MNISTIterator: public IIterator { unsigned inst_offset_; /*! \brief instance index */ std::vector inst_; - // random sampler - utils::RandomSampler rnd; // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator From 12b3d22d4823a44004f8784f813bed754dea652b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 07/61] finish merge remote master --- Makefile | 4 +- include/mxnet/io.h | 105 +++++++++++++++++++++++ src/io/inst_vector.h | 117 ++++++++++++++++++++++++++ src/io/io.cc | 60 +++++++++++++ src/io/iter_mnist-inl.h | 181 ++++++++++++++++++++++++++++++++++++++++ test/io_mnist_test.cc | 96 +++++++++++++++++++++ 6 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 include/mxnet/io.h create mode 100644 src/io/inst_vector.h create mode 100644 src/io/io.cc create mode 100644 src/io/iter_mnist-inl.h create mode 100644 test/io_mnist_test.cc diff --git a/Makefile b/Makefile index e95ee067980f..b8f0bd899941 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o +OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -101,11 +101,13 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu +io.o: src/io/io.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) test/test_storage: test/test_storage.cc lib/libmxnet.a +test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/io.h b/include/mxnet/io.h new file mode 100644 index 000000000000..29dccbace770 --- /dev/null +++ b/include/mxnet/io.h @@ -0,0 +1,105 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file io.h + * \brief mxnet io data structure and data iterator + */ +#ifndef MXNET_IO_H_ +#define MXNET_IO_H_ +#include +#include +#include +#include +#include "./base.h" + +namespace mxnet { +/*! + * \brief iterator type + * \tparam DType data type + */ +template +class IIterator : public dmlc::DataIter { + public: + /*! + * \brief set the parameter + * \param name name of parameter + * \param val value of parameter + */ + virtual void SetParam(const char *name, const char *val) = 0; + /*! \brief initalize the iterator so that we can use the iterator */ + virtual void Init(void) = 0; + /*! \brief set before first of the item */ + virtual void BeforeFirst(void) = 0; + /*! \brief move to next item */ + virtual bool Next(void) = 0; + /*! \brief get current data */ + virtual const DType &Value(void) const = 0; + /*! \brief constructor */ + virtual ~IIterator(void) {} + /*! \brief store the name of each data, it could be used for making NArrays */ + std::vector data_names; + /*! \brief set data name to each attribute of data */ + inline void SetDataName(const std::string data_name){ + data_names.push_back(data_name); + } +}; // class IIterator + +/*! \brief a single data instance */ +struct DataInst { + /*! \brief unique id for instance */ + unsigned index; + /*! \brief content of data */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; +}; // struct DataInst + +/*! + * \brief a standard batch of data commonly used by iterator + * a databatch contains multiple TBlobs. Each Tblobs has + * a name stored in a map. There's no different between + * data and label, how we use them is to see the DNN implementation. + */ +struct DataBatch { + public: + /*! \brief unique id for instance, can be NULL, sometimes is useful */ + unsigned *inst_index; + /*! \brief number of instance */ + mshadow::index_t batch_size; + /*! \brief number of padding elements in this batch, + this is used to indicate the last elements in the batch are only padded up to match the batch, and should be discarded */ + mshadow::index_t num_batch_padd; + public: + /*! \brief content of dense data, if this DataBatch is dense */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; + public: + /*! \brief constructor */ + DataBatch(void) { + inst_index = NULL; + batch_size = 0; num_batch_padd = 0; + } + /*! \brief giving name to the data */ + void Naming(std::vector names); +}; // struct DataBatch + +/*! + * \brief create the databatch iterator IIterator + * \param cfg configure settings key=vale pair + * \return the data IIterator ptr + */ +IIterator *CreateIterator(const std::vector > &cfg); +/*! + * \brief create the databatch iterator IIterator from config file + * \param cfg_path configure file path + * \return the data IIterator ptr + */ +IIterator *CreateIteratorFromConfig(const char* cfg_path); +/*! + * \brief create the databatch iterator IIterator by iter name + * \param iter_name can be mnist, imgrec and so on + * \return the data IIterator ptr + */ +IIterator *CreateIteratorByName(const char* iter_name); +} // namespace mxnet +#endif // MXNET_IO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h new file mode 100644 index 000000000000..1ae734631680 --- /dev/null +++ b/src/io/inst_vector.h @@ -0,0 +1,117 @@ +/*! + * Copyright (c) 2015 by Contributors + * \inst_vector.h + * \brief holder of a sequence of DataInst in CPU + * that are not necessarily of same shape + */ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ +#include +#include +#include +#include +#include "./data.h" +namespace mxnet { +/*! + * \brief tensor vector that can store sequence of tensor + * in a memory compact way, tensors do not have to be of same shape + */ +template +class TensorVector { + public: + TensorVector(void) { + this->Clear(); + } + // get i-th tensor + inline mshadow::Tensor + operator[](size_t i) const { + CHECK(i + 1 < offset_.size()); + CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); + return mshadow::Tensor + (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + } + inline mshadow::Tensor Back() const { + return (*this)[Size() - 1]; + } + inline size_t Size(void) const { + return shape_.size(); + } + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(mshadow::Shape shape) { + shape_.push_back(shape); + offset_.push_back(offset_.back() + shape.Size()); + content_.resize(offset_.back()); + } + inline void Clear(void) { + offset_.clear(); + offset_.push_back(0); + content_.clear(); + shape_.clear(); + } + + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector > shape_; +}; + +/*! + * \brief tblob vector that can store sequence of tblob + * in a memory compact way, tblobs do not have to be of same shape + */ +template +class TBlobVector { + public: + TBlobVector(void) { + this->Clear(); + } + // get i-th tblob + inline TBlob operator[](size_t i) const; + // get the last tblob + inline TBlob Back(); + // return the size of the vector + inline size_t Size(void) const; + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(TShape shape_); + inline void Clear(void); + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector shape_; +}; + +/*! + * \brief instance vector that can holds + * non-uniform shape data instance in a shape efficient way + */ +class InstVector { + public: + inline size_t Size(void) const { + return index_.size(); + } + // instance + inline DataInst operator[](size_t i) const; + // get back of instance vector + inline DataInst Back() const; + // clear the container + inline void Clear(void); + // push the newly coming instance + inline void Push(unsigned index, TBlob data_); + + private: + /*! \brief index of the data */ + std::vector index_; + // data + std::vector > data_; + // extra data + std::vector extra_data_; +}; +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc new file mode 100644 index 000000000000..2df16e4fc209 --- /dev/null +++ b/src/io/io.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2015 by Contributors +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include +#include +#include +#include +#include +#include +#include +#include "iter_mnist-inl.h" +#include "../utils/random.h" + +namespace mxnet { + IIterator *CreateIterator( + const std::vector< std::pair > &cfg) { + size_t i = 0; + IIterator *it = NULL; + for (; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "iter")) { + if (!strcmp(val, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); continue; + } + CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; + } + if (it != NULL) { + it->SetParam(name, val); + } + } + CHECK(it != NULL) << "must specify iterator by iter=itername"; + return it; + } + + IIterator *CreateIteratorFromConfig(const char* cfg_path) { + std::ifstream ifs(cfg_path, std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + dmlc::Config cfg(ifs); + for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + dmlc::Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + return CreateIterator(itcfg); + } + + IIterator *CreateIteratorByName(const char* iter_name) { + IIterator *it = NULL; + // Currently only support mnist + if (!strcmp(iter_name, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); + } + CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; + return it; + } +} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h new file mode 100644 index 000000000000..376838fcf3f0 --- /dev/null +++ b/src/io/iter_mnist-inl.h @@ -0,0 +1,181 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist-inl.h + * \brief iterator that takes mnist dataset + */ +#ifndef MXNET_IO_ITER_MNIST_INL_H_ +#define MXNET_IO_ITER_MNIST_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include "../utils/random.h" + +namespace mxnet { +class MNISTIterator: public IIterator { + public: + MNISTIterator(void) { + img_.dptr_ = NULL; + mode_ = 1; + inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); + out_.data.resize(2); + } + virtual ~MNISTIterator(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } + // intialize iterator loads data in + virtual void Init(void) { + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (mode_ == 1) { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = batch_size_; + if (shuffle_) this->Shuffle(); + if (silent_ == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + batch_size_ <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += batch_size_; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + rnd.Shuffle(&inst_); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief silent */ + int silent_; + /*! \brief path */ + std::string path_img, path_label; + /*! \brief output */ + DataBatch out_; + /*! \brief whether do shuffle */ + int shuffle_; + /*! \brief data mode */ + int mode_; + /*! \brief current location */ + index_t loc_; + /*! \brief batch size */ + index_t batch_size_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // random sampler + utils::RandomSampler rnd; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIterator +} // namespace mxnet +#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc new file mode 100644 index 000000000000..2bfba24a507a --- /dev/null +++ b/test/io_mnist_test.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2015 by Contributors +// IO test code + +#include +#include +#include +#include +#include +#include "mxnet/io.h" +#include "../src/io/iter_mnist-inl.h" + +using namespace std; +using namespace mxnet; +using namespace dmlc; + +void InitIter(IIterator* itr, + const std::vector< std::pair< std::string, std::string> > &defcfg) { + for (size_t i = 0; i < defcfg.size(); ++i) { + itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); + } + itr->Init(); +} + +IIterator* CreateIterators( + const std::vector< std::pair< std::string, std::string> >& cfg) { + IIterator* data_itr = NULL; + int flag = 0; + std::string evname; + std::vector< std::pair< std::string, std::string> > itcfg; + std::vector< std::pair< std::string, std::string> > defcfg; + for (size_t i = 0; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "data")) { + flag = 1; continue; + } + if (!strcmp(name, "eval")) { + flag = 2; continue; + } + if (!strcmp(name, "pred")) { + flag = 3; continue; + } + if (!strcmp(name, "iterend") && !strcmp(val, "true")) { + if (flag == 1) { + data_itr = mxnet::CreateIterator(itcfg); + } + flag = 0; itcfg.clear(); + } + if (flag == 0) { + defcfg.push_back(cfg[i]); + } else { + itcfg.push_back(cfg[i]); + } + } + if (data_itr != NULL) { + InitIter(data_itr, defcfg); + } + return data_itr; +} + +/*! + * Usage: ./io_mnist_test /path/to/io_config/file + * Example + * data = train + * iter = mnist + * path_img = "./data/mnist/train-images-idx3-ubyte" + * path_label = "./data/mnist/train-labels-idx1-ubyte" + * shuffle = 1 + * iterend = true + * input_shape = 1,1,784 + * batch_size = 100 + * + */ + +int main(int argc, char** argv) { + std::ifstream ifs(argv[1], std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + Config cfg(ifs); + for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + IIterator* data_itr = CreateIterators(itcfg); + data_itr->BeforeFirst(); + int batch_dir = 0; + while (data_itr->Next()) { + std::cout << "Label of Batch " << batch_dir++ << std::endl; + // print label + DataBatch db = data_itr->Value(); + mshadow::Tensor label = db.data[1].get(); + for (size_t i = 0; i < label.shape_.shape_[0]; i++) + std::cout << label.dptr_[i] << " "; + std::cout << "\n"; + } +} From f673f7dd207725afb5feed4e868d9318ea5d7c3a Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 08/61] built in python, start polishing new feature required --- Makefile | 4 +-- include/mxnet/c_api.h | 23 ++++++++++++ python/mxnet/base.py | 2 +- python/mxnet/io.py | 84 +++++++++++++++++++++++++++++++++++++++++++ python/test_io.py | 22 ++++++++++++ src/c_api.cc | 1 + 6 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 python/mxnet/io.py create mode 100644 python/test_io.py diff --git a/Makefile b/Makefile index b8f0bd899941..70aac56dc4d5 100644 --- a/Makefile +++ b/Makefile @@ -103,8 +103,8 @@ softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu io.o: src/io/io.cc -lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) -lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) +lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) +lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5802c32cf75c..f1119b3323c5 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -460,6 +460,29 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/mxnet/base.py b/python/mxnet/base.py index c514d6939988..9d5026f126cf 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -70,7 +70,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p - +DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition #---------------------------- diff --git a/python/mxnet/io.py b/python/mxnet/io.py new file mode 100644 index 000000000000..96e4938a79b3 --- /dev/null +++ b/python/mxnet/io.py @@ -0,0 +1,84 @@ +# coding: utf-8 + +"""NArray interface of mxnet""" +from __future__ import absolute_import + +import ctypes +from .base import _LIB +from .base import DataIterHandle, NArrayHandle +from .base import check_call +from .narray import NArray + +class DataIter(object): + """DataIter object in mxnet + + DataIter is a wrapper for C++ DataIter functions + """ + + def __init__(self): + """initialize a new dataiter + + """ + self._datahandle = None + + def createfromcfg(self, cfg_path): + """create a dataiter from config file + + cfg_path is the path of configure file + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) + self._datahandle = hdl + + def createbyname(self, iter_name): + """create a dataiter by the name + + iter_name can be mnist imgrec or so on + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) + self._datahandle = hdl + + def setparam(self, name, val): + """set param value for dataiter + + name prameter name + val parameter value + """ + check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) + + def init(self): + """init dataiter + + """ + check_call(_LIB.MXIOInit(self._datahandle)) + + def beforefirst(self): + """set loc to 0 + + """ + check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + + def next(self): + """init dataiter + + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + return next_res.value + + def getdata(self): + """get data from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) + + def getlabel(self): + """get label from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) diff --git a/python/test_io.py b/python/test_io.py new file mode 100644 index 000000000000..6909176d11c2 --- /dev/null +++ b/python/test_io.py @@ -0,0 +1,22 @@ +#pylint: skip-file +import mxnet as mx + +dataiter = mx.io.DataIter() +#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') +dataiter.createbyname('mnist') +dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") +dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") +dataiter.setparam('shuffle', '1') +dataiter.setparam('seed_data', '2') +dataiter.setparam('batch_size', '100') + +dataiter.init() + +dataiter.beforefirst() + +for i in range(100): + dataiter.next() + info = "Batch %d" % (i) + print info + label = dataiter.getdata() + print label.numpy diff --git a/src/c_api.cc b/src/c_api.cc index b251ba578743..02c231af9f7e 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From a8dcbd183e89c0d04d7393a3956e922ac59b475c Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 09/61] finish old version registry in C --- Makefile | 5 ++- include/mxnet/c_api.h | 34 +++++++++++++++++ include/mxnet/io.h | 5 +++ python/mxnet/base.py | 1 + src/c_api.cc | 1 + src/io/io.cc | 4 +- src/io/iter_mnist-inl.h | 83 ++++++++++++++++++++++++----------------- src/io/iter_mnist.cc | 17 +++++++++ 8 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 src/io/iter_mnist.cc diff --git a/Makefile b/Makefile index 70aac56dc4d5..e6d663aadd33 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o +OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o io.o iter_mnist.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -102,12 +102,13 @@ pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu io.o: src/io/io.cc +iter_mnist.o: src/io/iter_mnist.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index f1119b3323c5..88a2934bbbdf 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -36,6 +36,8 @@ typedef void *SymbolHandle; typedef void *AtomicSymbolHandle; /*! \brief handle to an Executor */ typedef void *ExecutorHandle; +/*! \brief handle a dataiter creator */ +typedef void *DataIterCreator; /*! \brief handle to a DataIterator */ typedef void *DataIterHandle; /*! @@ -519,5 +521,37 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, */ MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 29dccbace770..16c86138abe1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -25,6 +25,11 @@ class IIterator : public dmlc::DataIter { * \param val value of parameter */ virtual void SetParam(const char *name, const char *val) = 0; + /*! + * \brief init the parameter + * \param kwargs key-value pairs + */ + virtual void InitParams(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 9d5026f126cf..63f72b87ad59 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -70,6 +70,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p +DataIterCreatorHandle = ctypes.c_void_p DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition diff --git a/src/c_api.cc b/src/c_api.cc index 02c231af9f7e..f5f2d12ff66d 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/io/io.cc b/src/io/io.cc index 2df16e4fc209..60c013a812a5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -23,7 +23,7 @@ namespace mxnet { if (!strcmp(name, "iter")) { if (!strcmp(val, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); continue; + it = new io::MNISTIterator(); continue; } CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; } @@ -52,7 +52,7 @@ namespace mxnet { // Currently only support mnist if (!strcmp(iter_name, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); + it = new io::MNISTIterator(); } CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; return it; diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 376838fcf3f0..62168f8f1811 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -10,19 +10,42 @@ #include #include #include +#include #include #include #include "../utils/random.h" namespace mxnet { +namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string path_img, path_label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + int input_flat; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(Param) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); + DMLC_DECLARE_FIELD(shuffle).set_default(false); + DMLC_DECLARE_FIELD(silent).set_default(false); + DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) + .add_enum("noflat", 0).set_default(1); + } +}; + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -30,15 +53,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { @@ -47,34 +64,34 @@ class MNISTIterator: public IIterator { // set name this->SetDataName(std::string("data")); this->SetDataName(std::string("label")); - if (mode_ == 1) { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + if (param.input_flat == 1) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); } else { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); } out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); batch_label_.stride_ = 1; batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = batch_size_; - if (shuffle_) this->Shuffle(); - if (silent_ == 0) { + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); } } virtual void BeforeFirst(void) { this->loc_ = 0; } virtual bool Next(void) { - if (loc_ + batch_size_ <= img_.size(0)) { + if (loc_ + param.batch_size <= img_.size(0)) { batch_data_.dptr_ = img_[loc_].dptr_; batch_label_.dptr_ = &labels_[loc_]; out_.data[0] = TBlob(batch_data_); out_.data[1] = TBlob(batch_label_); out_.inst_index = &inst_[loc_]; - loc_ += batch_size_; + loc_ += param.batch_size; return true; } else { return false; @@ -83,10 +100,13 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - + virtual void InitParams(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + } private: inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); ReadInt(stdimg); int image_count = ReadInt(stdimg); int image_rows = ReadInt(stdimg); @@ -111,7 +131,7 @@ class MNISTIterator: public IIterator { delete stdimg; } inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); ReadInt(stdlabel); int labels_count = ReadInt(stdlabel); labels_.resize(labels_count); @@ -146,20 +166,12 @@ class MNISTIterator: public IIterator { } private: - /*! \brief silent */ - int silent_; - /*! \brief path */ - std::string path_img, path_label; + /*! \brief MNIST iter params */ + MNISTParam param; /*! \brief output */ DataBatch out_; - /*! \brief whether do shuffle */ - int shuffle_; - /*! \brief data mode */ - int mode_; /*! \brief current location */ index_t loc_; - /*! \brief batch size */ - index_t batch_size_; /*! \brief image content */ mshadow::Tensor img_; /*! \brief label content */ @@ -177,5 +189,6 @@ class MNISTIterator: public IIterator { // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator +} // namespace io } // namespace mxnet #endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc new file mode 100644 index 000000000000..942398749378 --- /dev/null +++ b/src/io/iter_mnist.cc @@ -0,0 +1,17 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist.cc + * \brief register mnist iterator + * \author Tianjun Xiao +*/ +#include +#include "./iter_mnist-inl.h" + +namespace mxnet { +namespace io { + +DMLC_REGISTER_PARAMETER(MNISTParam); +REGISTER_IO_ITER(mnist, MNISTIterator); + +} // namespace io +} // namespace mxnet From 7f36bbb93805215b947a1ee89e7f4291f616b6a8 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 10/61] modify to dmlc registry --- include/mxnet/io.h | 30 ++++++++++++++++++++++++++++++ python/mxnet/io.py | 25 ++++++++++++++++++++++--- src/io/io.cc | 5 +++++ src/io/iter_mnist-inl.h | 20 +++++++++++++------- src/io/iter_mnist.cc | 5 +++-- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 16c86138abe1..600978023b5b 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -6,6 +6,7 @@ #ifndef MXNET_IO_H_ #define MXNET_IO_H_ #include +#include #include #include #include @@ -106,5 +107,34 @@ IIterator *CreateIteratorFromConfig(const char* cfg_path); * \return the data IIterator ptr */ IIterator *CreateIteratorByName(const char* iter_name); + +/*! \brief typedef the factory function of data iterator */ +typedef IIterator *(*DataIteratorFactory)(); +/*! + * \brief Registry entry for DataIterator factory functions. + */ +struct DataIteratorReg + : public dmlc::FunctionRegEntryBase { +}; +//-------------------------------------------------------------- +// The following part are API Registration of Iterators +//-------------------------------------------------------------- +/*! + * \brief Macro to register Iterators + * + * \code + * // example of registering a mnist iterator + * REGISTER_IO_ITERATOR(MNIST, MNISTIterator) + * .describe("Mnist data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_ITER(name, DataIteratorType) \ + static ::mxnet::IIterator* __create__ ## DataIteratorType ## __() { \ + return new DataIteratorType; \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## DataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 96e4938a79b3..ead49f07c4fd 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -5,16 +5,35 @@ import ctypes from .base import _LIB +from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray class DataIter(object): - """DataIter object in mxnet + """DataIter object in mxnet. List all the needed functions here. """ - DataIter is a wrapper for C++ DataIter functions - """ + def __init__(self, handle): + """Initialize with handle + Parameters + ---------- + handle : DataIterHandle + the handle to the underlying C++ Data Iterator + """ + self.handle = handle + + def __del__(self): + check_call(_LIB.MXDataIterFree(self.handle)) + + + + + + + + + def __init__(self): """initialize a new dataiter diff --git a/src/io/io.cc b/src/io/io.cc index 60c013a812a5..aafe85073a52 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,10 @@ #include "iter_mnist-inl.h" #include "../utils/random.h" +namespace dmlc { +DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + namespace mxnet { IIterator *CreateIterator( const std::vector< std::pair > &cfg) { diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 62168f8f1811..6a705c483e5b 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -30,14 +30,20 @@ struct MNISTParam : public dmlc::Parameter { /*! \brief data mode */ int input_flat; // declare parameters in header file - DMLC_DECLARE_PARAMETER(Param) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); - DMLC_DECLARE_FIELD(shuffle).set_default(false); - DMLC_DECLARE_FIELD(silent).set_default(false); - DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1); + .add_enum("noflat", 0).set_default(1) + .describe("Whether to flat the data into 1D."); } }; diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 942398749378..c6fab8d376d7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,15 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -REGISTER_IO_ITER(mnist, MNISTIterator); +MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) + .describe("Create MNISTIterator") + .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From 299e1d82b636976faab503edc4072506a7339386 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 11/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 110 +++++++++++--------------- include/mxnet/io.h | 4 +- python/mxnet/__init__.py | 1 + python/mxnet/io.py | 166 ++++++++++++++++++++++++++++----------- python/test_io.py | 27 +++---- python/test_mnist.py | 79 ++++++------------- src/c_api.cc | 82 +++++++++++++++++++ src/io/iter_mnist-inl.h | 3 +- src/io/iter_mnist.cc | 2 +- 9 files changed, 293 insertions(+), 181 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 88a2934bbbdf..631e0c032852 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -454,64 +454,82 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, // Part 5: IO Interface //-------------------------------------------- /*! - * \brief create an data iterator from configs string - * \param cfg config string that contains the - * configuration about the iterator - * \param out the handle to the iterator + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateFromConfig(const char *cfg, - DataIterHandle *out); +MXNET_DLL int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator * \param out return value of next * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIONext(DataIterHandle handle, +MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! * \brief call iterator.BeforeFirst * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOBeforeFirst(DataIterHandle handle); -/*! - * \brief free the handle to the IO module - * \param handle the handle pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOFree(DataIterHandle handle); +MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); + /*! * \brief get the handle to the NArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetData(DataIterHandle handle, +MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out); /*! * \brief get the handle to the NArray of underlying label @@ -519,39 +537,7 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, * \param out the handle to underlying label NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetLabel(DataIterHandle handle, +MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 600978023b5b..4ca5ed05fd18 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -27,10 +27,10 @@ class IIterator : public dmlc::DataIter { */ virtual void SetParam(const char *name, const char *val) = 0; /*! - * \brief init the parameter + * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void InitParams(const std::vector >& kwargs) = 0; + virtual void SetInit(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 77748dd1950c..75cfd2d88675 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -12,6 +12,7 @@ from .context import Context, current_context from . import narray from . import symbol +from . import io __version__ = "0.1.0" diff --git a/python/mxnet/io.py b/python/mxnet/io.py index ead49f07c4fd..baee99a02d61 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -4,6 +4,7 @@ from __future__ import absolute_import import ctypes +import sys from .base import _LIB from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle @@ -25,65 +26,42 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. - - - - - - - - - def __init__(self): - """initialize a new dataiter - - """ - self._datahandle = None - - def createfromcfg(self, cfg_path): - """create a dataiter from config file - - cfg_path is the path of configure file - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) - self._datahandle = hdl - - def createbyname(self, iter_name): - """create a dataiter by the name - - iter_name can be mnist imgrec or so on - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) - self._datahandle = hdl - - def setparam(self, name, val): - """set param value for dataiter - - name prameter name - val parameter value - """ - check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) - - def init(self): - """init dataiter - + Parameters + --------- + args: + provide positional arguments, should not be given. + + kwargs: + provide keyword arguments + Returns + ------- + the inited iterator """ - check_call(_LIB.MXIOInit(self._datahandle)) + if len(args) != 0: + raise TypeError('data iterator only accept \ + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) + check_call(_LIB.MXDataIterSetInit( \ + self.handle, num_args, keys, vals)) def beforefirst(self): """set loc to 0 """ - check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): """init dataiter """ next_res = ctypes.c_int(0) - check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value def getdata(self): @@ -91,7 +69,7 @@ def getdata(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetData(self.handle, ctypes.byref(hdl))) return NArray(hdl) def getlabel(self): @@ -99,5 +77,97 @@ def getlabel(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NArray(hdl) + +def _make_io_iterator(handle): + """Create an io iterator by handle.""" + name = ctypes.c_char_p() + desc = ctypes.c_char_p() + num_args = mx_uint() + arg_names = ctypes.POINTER(ctypes.c_char_p)() + arg_types = ctypes.POINTER(ctypes.c_char_p)() + arg_descs = ctypes.POINTER(ctypes.c_char_p)() + + check_call(_LIB.MXDataIterGetIterInfo( \ + handle, ctypes.byref(name), ctypes.byref(desc), \ + ctypes.byref(num_args), \ + ctypes.byref(arg_names), \ + ctypes.byref(arg_types), \ + ctypes.byref(arg_descs))) + iter_name = name.value + param_str = [] + for i in range(num_args.value): + ret = '%s : %s' % (arg_names[i], arg_types[i]) + if len(arg_descs[i]) != 0: + ret += '\n ' + arg_descs[i] + param_str.append(ret) + + doc_str = ('%s\n\n' + + 'Parameters\n' + + '----------\n' + + '%s\n' + + 'name : string, required.\n' + + ' Name of the resulting data iterator.\n\n' + + 'Returns\n' + + '-------\n' + + 'iterator: Iterator\n'+ + ' The result iterator.') + doc_str = doc_str % (desc.value, '\n'.join(param_str)) + + def creator(*args, **kwargs): + """Create an iterator. + The parameters listed below can be passed in as keyword arguments. + + Parameters + ---------- + name : string, required. + Name of the resulting data iterator. + + Returns + ------- + symbol: Symbol + the resulting symbol + """ + param_keys = [] + param_vals = [] + symbol_kwargs = {} + name = kwargs.pop('name', None) + + for k, v in kwargs.items(): + param_keys.append(c_str(k)) + param_vals.append(c_str(str(v))) + # create atomic symbol + param_keys = c_array(ctypes.c_char_p, param_keys) + param_vals = c_array(ctypes.c_char_p, param_vals) + iter_handle = DataIterHandle() + check_call(_LIB.MXDataIterCreateIter( + handle, len(param_keys), + param_keys, param_vals, + ctypes.byref(iter_handle))) + + if len(args): + raise TypeError('%s can only accept keyword arguments' % iter_name) + + return DataIter(iter_handle) + + creator.__name__ = iter_name + creator.__doc__ = doc_str + return creator + + +def _init_io_module(): + """List and add all the data iterators to current module.""" + plist = ctypes.POINTER(ctypes.c_void_p)() + size = ctypes.c_uint() + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + + module_obj = sys.modules[__name__] + for i in range(size.value): + hdl = ctypes.c_void_p(plist[i]) + dataiter = _make_io_iterator(hdl) + setattr(module_obj, dataiter.__name__, dataiter) + +# Initialize the io in startups +_init_io_module() diff --git a/python/test_io.py b/python/test_io.py index 6909176d11c2..d15d4cc32fcd 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -1,22 +1,21 @@ #pylint: skip-file import mxnet as mx +import numpy as np +import os -dataiter = mx.io.DataIter() -#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') -dataiter.createbyname('mnist') -dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") -dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") -dataiter.setparam('shuffle', '1') -dataiter.setparam('seed_data', '2') -dataiter.setparam('batch_size', '100') - -dataiter.init() +dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") dataiter.beforefirst() -for i in range(100): - dataiter.next() - info = "Batch %d" % (i) +idx = 0 +while dataiter.next(): + info = "Batch %d" % (idx) + idx += 1 print info - label = dataiter.getdata() + ''' + label = dataiter.getlabel() print label.numpy + ''' + diff --git a/python/test_mnist.py b/python/test_mnist.py index 3a3ee85a8d3f..fa0f29a60033 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -14,7 +14,7 @@ def Softmax(x): def CalAcc(out, label): pred = np.argmax(out, axis=1) - return np.sum(pred == label) * 1.0 / out.shape[0] + return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] def SetGradient(out_grad, label): assert(out_grad.shape[0] == label.shape[0]) @@ -22,45 +22,6 @@ def SetGradient(out_grad, label): k = label[i] out_grad[i][k] -= 1.0 -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - return (self.data[start:end, :], self.label[start:end]) - - - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -69,7 +30,7 @@ def Get(self): fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) args_list = fc2.list_arguments() # infer shape -data_shape = (batch_size, 784) +data_shape = (batch_size, 1, 1, 784) arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] @@ -104,20 +65,30 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train = MNISTIter("train", batch_size) -valid = MNISTIter("valid", batch_size) +train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +train_dataiter.beforefirst() +val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +val_dataiter.beforefirst() for i in xrange(epoch): # train print "Epoch %d" % i train_acc = 0.0 val_acc = 0.0 - while train.Next(): - data, label = train.Get() - inputs["data"].numpy[:] = data + train_nbatch = 0 + val_nbatch = 0 + while train_dataiter.next(): + data = train_dataiter.getdata() + label = train_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy SetGradient(grad_narray.numpy, label) executor.backward([grad_narray]) @@ -126,15 +97,17 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - while valid.Next(): - data, label = valid.Get() - inputs["data"].numpy[:] = data + while val_dataiter.next(): + data = val_dataiter.getdata() + label = val_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() val_acc += CalAcc(out_narray.numpy, label) - print "Train Acc: ", train_acc / train.nbatch - print "Valid Acc: ", val_acc / valid.nbatch - train.BeforeFirst() - valid.BeforeFirst() + val_nbatch += 1 + print "Train Acc: ", train_acc / train_nbatch + print "Valid Acc: ", val_acc / val_nbatch + train_dataiter.beforefirst() + val_dataiter.beforefirst() diff --git a/src/c_api.cc b/src/c_api.cc index f5f2d12ff66d..5965b6a4fab2 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -611,3 +611,85 @@ int MXExecutorBind(SymbolHandle symbol_handle, *out = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); API_END(); } + +//-------------------------------------------- +// Part 5: IO Interface +//-------------------------------------------- +int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array) { + API_BEGIN(); + auto &vec = dmlc::Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) + API_END(); +} + +int MXDataIterGetName(DataIterCreator iter, + const char **out_name) { + API_BEGIN(); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); + API_END(); +} + +int MXDataIterGetIterInfo(DataIterCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions) { + DataIteratorReg *e = static_cast(creator); + return MXAPIGetFunctionRegInfo(e, name, description, num_args, + arg_names, arg_type_infos, arg_descriptions); +} + +int MXDataIterCreateIter(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IIterator *iter = nullptr; + API_BEGIN(); + DataIteratorReg *e = static_cast(creator); + iter = e->body(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->SetInit(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); +} + +int MXDataIterFree(DataIterHandle handle) { + API_BEGIN(); + delete static_cast *>(handle); + API_END(); +} + +int MXDataIterBeforeFirst(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->BeforeFirst(); + API_END(); +} + +int MXDataIterNext(DataIterHandle handle, int *out) { + API_BEGIN(); + *out = static_cast* >(handle)->Next(); + API_END(); +} + +int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[1], 0); + API_END(); +} + +int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[0], 0); + API_END(); +} diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 6a705c483e5b..88a3e4d82acd 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -106,9 +106,10 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - virtual void InitParams(const std::vector >& kwargs) { + virtual void SetInit(const std::vector >& kwargs) { std::map kmap(kwargs.begin(), kwargs.end()); param.Init(kmap); + this->Init(); } private: inline void LoadImage(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index c6fab8d376d7..d6119d6c8a69 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -10,7 +10,7 @@ namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) +MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); From 7cf55061980f79f1656b5f344c37472bc404acab Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 12/61] clean io interface --- include/mxnet/io.h | 25 --------------------- python/mxnet/io.py | 14 +++++------- python/test_mnist.py | 4 +--- src/common/utils.h | 6 ++++++ src/io/io.cc | 48 ----------------------------------------- src/io/iter_mnist-inl.h | 25 +++++++++++---------- 6 files changed, 24 insertions(+), 98 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 4ca5ed05fd18..ac22919745a1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -20,12 +20,6 @@ namespace mxnet { template class IIterator : public dmlc::DataIter { public: - /*! - * \brief set the parameter - * \param name name of parameter - * \param val value of parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief set the parameters and init iter * \param kwargs key-value pairs @@ -89,25 +83,6 @@ struct DataBatch { void Naming(std::vector names); }; // struct DataBatch -/*! - * \brief create the databatch iterator IIterator - * \param cfg configure settings key=vale pair - * \return the data IIterator ptr - */ -IIterator *CreateIterator(const std::vector > &cfg); -/*! - * \brief create the databatch iterator IIterator from config file - * \param cfg_path configure file path - * \return the data IIterator ptr - */ -IIterator *CreateIteratorFromConfig(const char* cfg_path); -/*! - * \brief create the databatch iterator IIterator by iter name - * \param iter_name can be mnist, imgrec and so on - * \return the data IIterator ptr - */ -IIterator *CreateIteratorByName(const char* iter_name); - /*! \brief typedef the factory function of data iterator */ typedef IIterator *(*DataIteratorFactory)(); /*! diff --git a/python/mxnet/io.py b/python/mxnet/io.py index baee99a02d61..dba36bd2114c 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -6,7 +6,7 @@ import ctypes import sys from .base import _LIB -from .base import c_array, c_str, mx_uint, string_types +from .base import c_array, c_str, mx_uint from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,9 +43,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ - keyword arguments') + keyword arguments') num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -131,8 +131,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] - symbol_kwargs = {} - name = kwargs.pop('name', None) for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -160,9 +158,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - + check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/python/test_mnist.py b/python/test_mnist.py index fa0f29a60033..8c3e09ba3705 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -67,7 +67,7 @@ def Update(mom, grad, weight): train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") + batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) train_dataiter.beforefirst() val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", @@ -109,5 +109,3 @@ def Update(mom, grad, weight): train_dataiter.beforefirst() val_dataiter.beforefirst() - - diff --git a/src/common/utils.h b/src/common/utils.h index f55ebc26535f..cf1fd2f1bb36 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -10,12 +10,18 @@ #include #include #include +#include #endif // DMLC_USE_CXX11 namespace common { #if DMLC_USE_CXX11 +/*! + * \brief Random Engine + */ +typedef std::mt19937 RANDOM_ENGINE; + /*! * \brief Helper functions. */ diff --git a/src/io/io.cc b/src/io/io.cc index aafe85073a52..fb7a8c2d3092 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -11,55 +11,7 @@ #include #include #include "iter_mnist-inl.h" -#include "../utils/random.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); } // namespace dmlc - -namespace mxnet { - IIterator *CreateIterator( - const std::vector< std::pair > &cfg) { - size_t i = 0; - IIterator *it = NULL; - for (; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "iter")) { - if (!strcmp(val, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); continue; - } - CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; - } - if (it != NULL) { - it->SetParam(name, val); - } - } - CHECK(it != NULL) << "must specify iterator by iter=itername"; - return it; - } - - IIterator *CreateIteratorFromConfig(const char* cfg_path) { - std::ifstream ifs(cfg_path, std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - dmlc::Config cfg(ifs); - for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - dmlc::Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - return CreateIterator(itcfg); - } - - IIterator *CreateIteratorByName(const char* iter_name) { - IIterator *it = NULL; - // Currently only support mnist - if (!strcmp(iter_name, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); - } - CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; - return it; - } -} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 88a3e4d82acd..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -13,7 +13,9 @@ #include #include #include -#include "../utils/random.h" +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { @@ -29,6 +31,8 @@ struct MNISTParam : public dmlc::Parameter { int batch_size; /*! \brief data mode */ int input_flat; + /*! \brief random seed */ + int seed_data; // declare parameters in header file DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") @@ -36,33 +40,29 @@ struct MNISTParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") .describe("Mnist label path."); DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); + .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); + .describe("Whether to print out data info."); DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) .add_enum("noflat", 0).set_default(1) .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed_data).set_default(0) + .describe("Random Seed."); } }; - + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); @@ -111,6 +111,7 @@ class MNISTIterator: public IIterator { param.Init(kmap); this->Init(); } + private: inline void LoadImage(void) { dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); @@ -151,7 +152,7 @@ class MNISTIterator: public IIterator { delete stdlabel; } inline void Shuffle(void) { - rnd.Shuffle(&inst_); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { @@ -191,8 +192,6 @@ class MNISTIterator: public IIterator { unsigned inst_offset_; /*! \brief instance index */ std::vector inst_; - // random sampler - utils::RandomSampler rnd; // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator From 994bc41c7eeabdbe78b2a6935250ee2134721397 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:17:01 +0800 Subject: [PATCH 13/61] modify to pass travis --- python/mxnet/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index dba36bd2114c..906843e5d0c5 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -132,9 +132,9 @@ def creator(*args, **kwargs): param_keys = [] param_vals = [] - for k, v in kwargs.items(): + for k, val in kwargs.items(): param_keys.append(c_str(k)) - param_vals.append(c_str(str(v))) + param_vals.append(c_str(str(val))) # create atomic symbol param_keys = c_array(ctypes.c_char_p, param_keys) param_vals = c_array(ctypes.c_char_p, param_vals) From 3e406ff030bfdc72d1490c2fe6142331d00ddb0d Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 14/61] finish merge remote master --- Makefile | 4 +- include/mxnet/io.h | 105 +++++++++++++++++++++++ src/io/inst_vector.h | 117 ++++++++++++++++++++++++++ src/io/io.cc | 60 +++++++++++++ src/io/iter_mnist-inl.h | 181 ++++++++++++++++++++++++++++++++++++++++ test/io_mnist_test.cc | 96 +++++++++++++++++++++ 6 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 include/mxnet/io.h create mode 100644 src/io/inst_vector.h create mode 100644 src/io/io.cc create mode 100644 src/io/iter_mnist-inl.h create mode 100644 test/io_mnist_test.cc diff --git a/Makefile b/Makefile index 8ebcfa896d62..1fe19f489872 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o +OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -105,11 +105,13 @@ convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu +io.o: src/io/io.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) test/test_storage: test/test_storage.cc lib/libmxnet.a +test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/io.h b/include/mxnet/io.h new file mode 100644 index 000000000000..29dccbace770 --- /dev/null +++ b/include/mxnet/io.h @@ -0,0 +1,105 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file io.h + * \brief mxnet io data structure and data iterator + */ +#ifndef MXNET_IO_H_ +#define MXNET_IO_H_ +#include +#include +#include +#include +#include "./base.h" + +namespace mxnet { +/*! + * \brief iterator type + * \tparam DType data type + */ +template +class IIterator : public dmlc::DataIter { + public: + /*! + * \brief set the parameter + * \param name name of parameter + * \param val value of parameter + */ + virtual void SetParam(const char *name, const char *val) = 0; + /*! \brief initalize the iterator so that we can use the iterator */ + virtual void Init(void) = 0; + /*! \brief set before first of the item */ + virtual void BeforeFirst(void) = 0; + /*! \brief move to next item */ + virtual bool Next(void) = 0; + /*! \brief get current data */ + virtual const DType &Value(void) const = 0; + /*! \brief constructor */ + virtual ~IIterator(void) {} + /*! \brief store the name of each data, it could be used for making NArrays */ + std::vector data_names; + /*! \brief set data name to each attribute of data */ + inline void SetDataName(const std::string data_name){ + data_names.push_back(data_name); + } +}; // class IIterator + +/*! \brief a single data instance */ +struct DataInst { + /*! \brief unique id for instance */ + unsigned index; + /*! \brief content of data */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; +}; // struct DataInst + +/*! + * \brief a standard batch of data commonly used by iterator + * a databatch contains multiple TBlobs. Each Tblobs has + * a name stored in a map. There's no different between + * data and label, how we use them is to see the DNN implementation. + */ +struct DataBatch { + public: + /*! \brief unique id for instance, can be NULL, sometimes is useful */ + unsigned *inst_index; + /*! \brief number of instance */ + mshadow::index_t batch_size; + /*! \brief number of padding elements in this batch, + this is used to indicate the last elements in the batch are only padded up to match the batch, and should be discarded */ + mshadow::index_t num_batch_padd; + public: + /*! \brief content of dense data, if this DataBatch is dense */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; + public: + /*! \brief constructor */ + DataBatch(void) { + inst_index = NULL; + batch_size = 0; num_batch_padd = 0; + } + /*! \brief giving name to the data */ + void Naming(std::vector names); +}; // struct DataBatch + +/*! + * \brief create the databatch iterator IIterator + * \param cfg configure settings key=vale pair + * \return the data IIterator ptr + */ +IIterator *CreateIterator(const std::vector > &cfg); +/*! + * \brief create the databatch iterator IIterator from config file + * \param cfg_path configure file path + * \return the data IIterator ptr + */ +IIterator *CreateIteratorFromConfig(const char* cfg_path); +/*! + * \brief create the databatch iterator IIterator by iter name + * \param iter_name can be mnist, imgrec and so on + * \return the data IIterator ptr + */ +IIterator *CreateIteratorByName(const char* iter_name); +} // namespace mxnet +#endif // MXNET_IO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h new file mode 100644 index 000000000000..1ae734631680 --- /dev/null +++ b/src/io/inst_vector.h @@ -0,0 +1,117 @@ +/*! + * Copyright (c) 2015 by Contributors + * \inst_vector.h + * \brief holder of a sequence of DataInst in CPU + * that are not necessarily of same shape + */ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ +#include +#include +#include +#include +#include "./data.h" +namespace mxnet { +/*! + * \brief tensor vector that can store sequence of tensor + * in a memory compact way, tensors do not have to be of same shape + */ +template +class TensorVector { + public: + TensorVector(void) { + this->Clear(); + } + // get i-th tensor + inline mshadow::Tensor + operator[](size_t i) const { + CHECK(i + 1 < offset_.size()); + CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); + return mshadow::Tensor + (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + } + inline mshadow::Tensor Back() const { + return (*this)[Size() - 1]; + } + inline size_t Size(void) const { + return shape_.size(); + } + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(mshadow::Shape shape) { + shape_.push_back(shape); + offset_.push_back(offset_.back() + shape.Size()); + content_.resize(offset_.back()); + } + inline void Clear(void) { + offset_.clear(); + offset_.push_back(0); + content_.clear(); + shape_.clear(); + } + + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector > shape_; +}; + +/*! + * \brief tblob vector that can store sequence of tblob + * in a memory compact way, tblobs do not have to be of same shape + */ +template +class TBlobVector { + public: + TBlobVector(void) { + this->Clear(); + } + // get i-th tblob + inline TBlob operator[](size_t i) const; + // get the last tblob + inline TBlob Back(); + // return the size of the vector + inline size_t Size(void) const; + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(TShape shape_); + inline void Clear(void); + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector shape_; +}; + +/*! + * \brief instance vector that can holds + * non-uniform shape data instance in a shape efficient way + */ +class InstVector { + public: + inline size_t Size(void) const { + return index_.size(); + } + // instance + inline DataInst operator[](size_t i) const; + // get back of instance vector + inline DataInst Back() const; + // clear the container + inline void Clear(void); + // push the newly coming instance + inline void Push(unsigned index, TBlob data_); + + private: + /*! \brief index of the data */ + std::vector index_; + // data + std::vector > data_; + // extra data + std::vector extra_data_; +}; +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc new file mode 100644 index 000000000000..2df16e4fc209 --- /dev/null +++ b/src/io/io.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2015 by Contributors +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include +#include +#include +#include +#include +#include +#include +#include "iter_mnist-inl.h" +#include "../utils/random.h" + +namespace mxnet { + IIterator *CreateIterator( + const std::vector< std::pair > &cfg) { + size_t i = 0; + IIterator *it = NULL; + for (; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "iter")) { + if (!strcmp(val, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); continue; + } + CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; + } + if (it != NULL) { + it->SetParam(name, val); + } + } + CHECK(it != NULL) << "must specify iterator by iter=itername"; + return it; + } + + IIterator *CreateIteratorFromConfig(const char* cfg_path) { + std::ifstream ifs(cfg_path, std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + dmlc::Config cfg(ifs); + for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + dmlc::Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + return CreateIterator(itcfg); + } + + IIterator *CreateIteratorByName(const char* iter_name) { + IIterator *it = NULL; + // Currently only support mnist + if (!strcmp(iter_name, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); + } + CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; + return it; + } +} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h new file mode 100644 index 000000000000..376838fcf3f0 --- /dev/null +++ b/src/io/iter_mnist-inl.h @@ -0,0 +1,181 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist-inl.h + * \brief iterator that takes mnist dataset + */ +#ifndef MXNET_IO_ITER_MNIST_INL_H_ +#define MXNET_IO_ITER_MNIST_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include "../utils/random.h" + +namespace mxnet { +class MNISTIterator: public IIterator { + public: + MNISTIterator(void) { + img_.dptr_ = NULL; + mode_ = 1; + inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); + out_.data.resize(2); + } + virtual ~MNISTIterator(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } + // intialize iterator loads data in + virtual void Init(void) { + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (mode_ == 1) { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = batch_size_; + if (shuffle_) this->Shuffle(); + if (silent_ == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + batch_size_ <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += batch_size_; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + rnd.Shuffle(&inst_); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief silent */ + int silent_; + /*! \brief path */ + std::string path_img, path_label; + /*! \brief output */ + DataBatch out_; + /*! \brief whether do shuffle */ + int shuffle_; + /*! \brief data mode */ + int mode_; + /*! \brief current location */ + index_t loc_; + /*! \brief batch size */ + index_t batch_size_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // random sampler + utils::RandomSampler rnd; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIterator +} // namespace mxnet +#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc new file mode 100644 index 000000000000..2bfba24a507a --- /dev/null +++ b/test/io_mnist_test.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2015 by Contributors +// IO test code + +#include +#include +#include +#include +#include +#include "mxnet/io.h" +#include "../src/io/iter_mnist-inl.h" + +using namespace std; +using namespace mxnet; +using namespace dmlc; + +void InitIter(IIterator* itr, + const std::vector< std::pair< std::string, std::string> > &defcfg) { + for (size_t i = 0; i < defcfg.size(); ++i) { + itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); + } + itr->Init(); +} + +IIterator* CreateIterators( + const std::vector< std::pair< std::string, std::string> >& cfg) { + IIterator* data_itr = NULL; + int flag = 0; + std::string evname; + std::vector< std::pair< std::string, std::string> > itcfg; + std::vector< std::pair< std::string, std::string> > defcfg; + for (size_t i = 0; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "data")) { + flag = 1; continue; + } + if (!strcmp(name, "eval")) { + flag = 2; continue; + } + if (!strcmp(name, "pred")) { + flag = 3; continue; + } + if (!strcmp(name, "iterend") && !strcmp(val, "true")) { + if (flag == 1) { + data_itr = mxnet::CreateIterator(itcfg); + } + flag = 0; itcfg.clear(); + } + if (flag == 0) { + defcfg.push_back(cfg[i]); + } else { + itcfg.push_back(cfg[i]); + } + } + if (data_itr != NULL) { + InitIter(data_itr, defcfg); + } + return data_itr; +} + +/*! + * Usage: ./io_mnist_test /path/to/io_config/file + * Example + * data = train + * iter = mnist + * path_img = "./data/mnist/train-images-idx3-ubyte" + * path_label = "./data/mnist/train-labels-idx1-ubyte" + * shuffle = 1 + * iterend = true + * input_shape = 1,1,784 + * batch_size = 100 + * + */ + +int main(int argc, char** argv) { + std::ifstream ifs(argv[1], std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + Config cfg(ifs); + for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + IIterator* data_itr = CreateIterators(itcfg); + data_itr->BeforeFirst(); + int batch_dir = 0; + while (data_itr->Next()) { + std::cout << "Label of Batch " << batch_dir++ << std::endl; + // print label + DataBatch db = data_itr->Value(); + mshadow::Tensor label = db.data[1].get(); + for (size_t i = 0; i < label.shape_.shape_[0]; i++) + std::cout << label.dptr_[i] << " "; + std::cout << "\n"; + } +} From c81b60bc3061cdc61002e8854016a94dd5d56e8c Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 15/61] built in python, start polishing new feature required --- Makefile | 4 +-- include/mxnet/c_api.h | 23 ++++++++++++ python/mxnet/base.py | 2 +- python/mxnet/io.py | 84 +++++++++++++++++++++++++++++++++++++++++++ python/test_io.py | 22 ++++++++++++ src/c_api.cc | 1 + 6 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 python/mxnet/io.py create mode 100644 python/test_io.py diff --git a/Makefile b/Makefile index 1fe19f489872..161b455236e4 100644 --- a/Makefile +++ b/Makefile @@ -107,8 +107,8 @@ reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc -lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) -lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) +lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) +lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5802c32cf75c..f1119b3323c5 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -460,6 +460,29 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 6cf8c616f805..744ef46ba9f2 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -75,7 +75,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p - +DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition #---------------------------- diff --git a/python/mxnet/io.py b/python/mxnet/io.py new file mode 100644 index 000000000000..96e4938a79b3 --- /dev/null +++ b/python/mxnet/io.py @@ -0,0 +1,84 @@ +# coding: utf-8 + +"""NArray interface of mxnet""" +from __future__ import absolute_import + +import ctypes +from .base import _LIB +from .base import DataIterHandle, NArrayHandle +from .base import check_call +from .narray import NArray + +class DataIter(object): + """DataIter object in mxnet + + DataIter is a wrapper for C++ DataIter functions + """ + + def __init__(self): + """initialize a new dataiter + + """ + self._datahandle = None + + def createfromcfg(self, cfg_path): + """create a dataiter from config file + + cfg_path is the path of configure file + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) + self._datahandle = hdl + + def createbyname(self, iter_name): + """create a dataiter by the name + + iter_name can be mnist imgrec or so on + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) + self._datahandle = hdl + + def setparam(self, name, val): + """set param value for dataiter + + name prameter name + val parameter value + """ + check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) + + def init(self): + """init dataiter + + """ + check_call(_LIB.MXIOInit(self._datahandle)) + + def beforefirst(self): + """set loc to 0 + + """ + check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + + def next(self): + """init dataiter + + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + return next_res.value + + def getdata(self): + """get data from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) + + def getlabel(self): + """get label from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) diff --git a/python/test_io.py b/python/test_io.py new file mode 100644 index 000000000000..6909176d11c2 --- /dev/null +++ b/python/test_io.py @@ -0,0 +1,22 @@ +#pylint: skip-file +import mxnet as mx + +dataiter = mx.io.DataIter() +#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') +dataiter.createbyname('mnist') +dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") +dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") +dataiter.setparam('shuffle', '1') +dataiter.setparam('seed_data', '2') +dataiter.setparam('batch_size', '100') + +dataiter.init() + +dataiter.beforefirst() + +for i in range(100): + dataiter.next() + info = "Batch %d" % (i) + print info + label = dataiter.getdata() + print label.numpy diff --git a/src/c_api.cc b/src/c_api.cc index b251ba578743..02c231af9f7e 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 0ebf6aa570afb4a0fed215d9f2c79c6e4f44dbf6 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 16/61] finish old version registry in C --- Makefile | 5 ++- include/mxnet/c_api.h | 34 +++++++++++++++++ include/mxnet/io.h | 5 +++ python/mxnet/base.py | 1 + src/c_api.cc | 1 + src/io/io.cc | 4 +- src/io/iter_mnist-inl.h | 83 ++++++++++++++++++++++++----------------- src/io/iter_mnist.cc | 17 +++++++++ 8 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 src/io/iter_mnist.cc diff --git a/Makefile b/Makefile index 161b455236e4..bd9a9a0fd5e6 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o +OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter_mnist.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -106,12 +106,13 @@ convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc +iter_mnist.o: src/io/iter_mnist.cc lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index f1119b3323c5..88a2934bbbdf 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -36,6 +36,8 @@ typedef void *SymbolHandle; typedef void *AtomicSymbolHandle; /*! \brief handle to an Executor */ typedef void *ExecutorHandle; +/*! \brief handle a dataiter creator */ +typedef void *DataIterCreator; /*! \brief handle to a DataIterator */ typedef void *DataIterHandle; /*! @@ -519,5 +521,37 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, */ MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 29dccbace770..16c86138abe1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -25,6 +25,11 @@ class IIterator : public dmlc::DataIter { * \param val value of parameter */ virtual void SetParam(const char *name, const char *val) = 0; + /*! + * \brief init the parameter + * \param kwargs key-value pairs + */ + virtual void InitParams(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 744ef46ba9f2..ec9d43dc58aa 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -75,6 +75,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p +DataIterCreatorHandle = ctypes.c_void_p DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition diff --git a/src/c_api.cc b/src/c_api.cc index 02c231af9f7e..f5f2d12ff66d 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/io/io.cc b/src/io/io.cc index 2df16e4fc209..60c013a812a5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -23,7 +23,7 @@ namespace mxnet { if (!strcmp(name, "iter")) { if (!strcmp(val, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); continue; + it = new io::MNISTIterator(); continue; } CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; } @@ -52,7 +52,7 @@ namespace mxnet { // Currently only support mnist if (!strcmp(iter_name, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); + it = new io::MNISTIterator(); } CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; return it; diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 376838fcf3f0..62168f8f1811 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -10,19 +10,42 @@ #include #include #include +#include #include #include #include "../utils/random.h" namespace mxnet { +namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string path_img, path_label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + int input_flat; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(Param) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); + DMLC_DECLARE_FIELD(shuffle).set_default(false); + DMLC_DECLARE_FIELD(silent).set_default(false); + DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) + .add_enum("noflat", 0).set_default(1); + } +}; + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -30,15 +53,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { @@ -47,34 +64,34 @@ class MNISTIterator: public IIterator { // set name this->SetDataName(std::string("data")); this->SetDataName(std::string("label")); - if (mode_ == 1) { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + if (param.input_flat == 1) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); } else { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); } out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); batch_label_.stride_ = 1; batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = batch_size_; - if (shuffle_) this->Shuffle(); - if (silent_ == 0) { + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); } } virtual void BeforeFirst(void) { this->loc_ = 0; } virtual bool Next(void) { - if (loc_ + batch_size_ <= img_.size(0)) { + if (loc_ + param.batch_size <= img_.size(0)) { batch_data_.dptr_ = img_[loc_].dptr_; batch_label_.dptr_ = &labels_[loc_]; out_.data[0] = TBlob(batch_data_); out_.data[1] = TBlob(batch_label_); out_.inst_index = &inst_[loc_]; - loc_ += batch_size_; + loc_ += param.batch_size; return true; } else { return false; @@ -83,10 +100,13 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - + virtual void InitParams(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + } private: inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); ReadInt(stdimg); int image_count = ReadInt(stdimg); int image_rows = ReadInt(stdimg); @@ -111,7 +131,7 @@ class MNISTIterator: public IIterator { delete stdimg; } inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); ReadInt(stdlabel); int labels_count = ReadInt(stdlabel); labels_.resize(labels_count); @@ -146,20 +166,12 @@ class MNISTIterator: public IIterator { } private: - /*! \brief silent */ - int silent_; - /*! \brief path */ - std::string path_img, path_label; + /*! \brief MNIST iter params */ + MNISTParam param; /*! \brief output */ DataBatch out_; - /*! \brief whether do shuffle */ - int shuffle_; - /*! \brief data mode */ - int mode_; /*! \brief current location */ index_t loc_; - /*! \brief batch size */ - index_t batch_size_; /*! \brief image content */ mshadow::Tensor img_; /*! \brief label content */ @@ -177,5 +189,6 @@ class MNISTIterator: public IIterator { // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator +} // namespace io } // namespace mxnet #endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc new file mode 100644 index 000000000000..942398749378 --- /dev/null +++ b/src/io/iter_mnist.cc @@ -0,0 +1,17 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist.cc + * \brief register mnist iterator + * \author Tianjun Xiao +*/ +#include +#include "./iter_mnist-inl.h" + +namespace mxnet { +namespace io { + +DMLC_REGISTER_PARAMETER(MNISTParam); +REGISTER_IO_ITER(mnist, MNISTIterator); + +} // namespace io +} // namespace mxnet From 7511c12d52185d174d1f44f5c2c67b833fb6afe7 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 17/61] modify to dmlc registry --- include/mxnet/io.h | 30 ++++++++++++++++++++++++++++++ python/mxnet/io.py | 25 ++++++++++++++++++++++--- src/io/io.cc | 5 +++++ src/io/iter_mnist-inl.h | 20 +++++++++++++------- src/io/iter_mnist.cc | 5 +++-- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 16c86138abe1..600978023b5b 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -6,6 +6,7 @@ #ifndef MXNET_IO_H_ #define MXNET_IO_H_ #include +#include #include #include #include @@ -106,5 +107,34 @@ IIterator *CreateIteratorFromConfig(const char* cfg_path); * \return the data IIterator ptr */ IIterator *CreateIteratorByName(const char* iter_name); + +/*! \brief typedef the factory function of data iterator */ +typedef IIterator *(*DataIteratorFactory)(); +/*! + * \brief Registry entry for DataIterator factory functions. + */ +struct DataIteratorReg + : public dmlc::FunctionRegEntryBase { +}; +//-------------------------------------------------------------- +// The following part are API Registration of Iterators +//-------------------------------------------------------------- +/*! + * \brief Macro to register Iterators + * + * \code + * // example of registering a mnist iterator + * REGISTER_IO_ITERATOR(MNIST, MNISTIterator) + * .describe("Mnist data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_ITER(name, DataIteratorType) \ + static ::mxnet::IIterator* __create__ ## DataIteratorType ## __() { \ + return new DataIteratorType; \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## DataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 96e4938a79b3..ead49f07c4fd 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -5,16 +5,35 @@ import ctypes from .base import _LIB +from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray class DataIter(object): - """DataIter object in mxnet + """DataIter object in mxnet. List all the needed functions here. """ - DataIter is a wrapper for C++ DataIter functions - """ + def __init__(self, handle): + """Initialize with handle + Parameters + ---------- + handle : DataIterHandle + the handle to the underlying C++ Data Iterator + """ + self.handle = handle + + def __del__(self): + check_call(_LIB.MXDataIterFree(self.handle)) + + + + + + + + + def __init__(self): """initialize a new dataiter diff --git a/src/io/io.cc b/src/io/io.cc index 60c013a812a5..aafe85073a52 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,10 @@ #include "iter_mnist-inl.h" #include "../utils/random.h" +namespace dmlc { +DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + namespace mxnet { IIterator *CreateIterator( const std::vector< std::pair > &cfg) { diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 62168f8f1811..6a705c483e5b 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -30,14 +30,20 @@ struct MNISTParam : public dmlc::Parameter { /*! \brief data mode */ int input_flat; // declare parameters in header file - DMLC_DECLARE_PARAMETER(Param) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); - DMLC_DECLARE_FIELD(shuffle).set_default(false); - DMLC_DECLARE_FIELD(silent).set_default(false); - DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1); + .add_enum("noflat", 0).set_default(1) + .describe("Whether to flat the data into 1D."); } }; diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 942398749378..c6fab8d376d7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,15 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -REGISTER_IO_ITER(mnist, MNISTIterator); +MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) + .describe("Create MNISTIterator") + .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From c03d62b471270f5d7c67d088d2e182bf6ce5a01e Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 18/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 110 +++++++++++--------------- include/mxnet/io.h | 4 +- python/mxnet/__init__.py | 1 + python/mxnet/io.py | 166 ++++++++++++++++++++++++++++----------- python/test_io.py | 27 +++---- python/test_mnist.py | 84 ++++++-------------- src/c_api.cc | 83 ++++++++++++++++++++ src/io/iter_mnist-inl.h | 3 +- src/io/iter_mnist.cc | 2 +- 9 files changed, 293 insertions(+), 187 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 88a2934bbbdf..631e0c032852 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -454,64 +454,82 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, // Part 5: IO Interface //-------------------------------------------- /*! - * \brief create an data iterator from configs string - * \param cfg config string that contains the - * configuration about the iterator - * \param out the handle to the iterator + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateFromConfig(const char *cfg, - DataIterHandle *out); +MXNET_DLL int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator * \param out return value of next * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIONext(DataIterHandle handle, +MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! * \brief call iterator.BeforeFirst * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOBeforeFirst(DataIterHandle handle); -/*! - * \brief free the handle to the IO module - * \param handle the handle pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOFree(DataIterHandle handle); +MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); + /*! * \brief get the handle to the NArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetData(DataIterHandle handle, +MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out); /*! * \brief get the handle to the NArray of underlying label @@ -519,39 +537,7 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, * \param out the handle to underlying label NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetLabel(DataIterHandle handle, +MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 600978023b5b..4ca5ed05fd18 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -27,10 +27,10 @@ class IIterator : public dmlc::DataIter { */ virtual void SetParam(const char *name, const char *val) = 0; /*! - * \brief init the parameter + * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void InitParams(const std::vector >& kwargs) = 0; + virtual void SetInit(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index c7720dcbd935..a8632bfa2ff8 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -12,6 +12,7 @@ from .base import MXNetError from . import narray from . import symbol +from . import io __version__ = "0.1.0" diff --git a/python/mxnet/io.py b/python/mxnet/io.py index ead49f07c4fd..baee99a02d61 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -4,6 +4,7 @@ from __future__ import absolute_import import ctypes +import sys from .base import _LIB from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle @@ -25,65 +26,42 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. - - - - - - - - - def __init__(self): - """initialize a new dataiter - - """ - self._datahandle = None - - def createfromcfg(self, cfg_path): - """create a dataiter from config file - - cfg_path is the path of configure file - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) - self._datahandle = hdl - - def createbyname(self, iter_name): - """create a dataiter by the name - - iter_name can be mnist imgrec or so on - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) - self._datahandle = hdl - - def setparam(self, name, val): - """set param value for dataiter - - name prameter name - val parameter value - """ - check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) - - def init(self): - """init dataiter - + Parameters + --------- + args: + provide positional arguments, should not be given. + + kwargs: + provide keyword arguments + Returns + ------- + the inited iterator """ - check_call(_LIB.MXIOInit(self._datahandle)) + if len(args) != 0: + raise TypeError('data iterator only accept \ + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) + check_call(_LIB.MXDataIterSetInit( \ + self.handle, num_args, keys, vals)) def beforefirst(self): """set loc to 0 """ - check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): """init dataiter """ next_res = ctypes.c_int(0) - check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value def getdata(self): @@ -91,7 +69,7 @@ def getdata(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetData(self.handle, ctypes.byref(hdl))) return NArray(hdl) def getlabel(self): @@ -99,5 +77,97 @@ def getlabel(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NArray(hdl) + +def _make_io_iterator(handle): + """Create an io iterator by handle.""" + name = ctypes.c_char_p() + desc = ctypes.c_char_p() + num_args = mx_uint() + arg_names = ctypes.POINTER(ctypes.c_char_p)() + arg_types = ctypes.POINTER(ctypes.c_char_p)() + arg_descs = ctypes.POINTER(ctypes.c_char_p)() + + check_call(_LIB.MXDataIterGetIterInfo( \ + handle, ctypes.byref(name), ctypes.byref(desc), \ + ctypes.byref(num_args), \ + ctypes.byref(arg_names), \ + ctypes.byref(arg_types), \ + ctypes.byref(arg_descs))) + iter_name = name.value + param_str = [] + for i in range(num_args.value): + ret = '%s : %s' % (arg_names[i], arg_types[i]) + if len(arg_descs[i]) != 0: + ret += '\n ' + arg_descs[i] + param_str.append(ret) + + doc_str = ('%s\n\n' + + 'Parameters\n' + + '----------\n' + + '%s\n' + + 'name : string, required.\n' + + ' Name of the resulting data iterator.\n\n' + + 'Returns\n' + + '-------\n' + + 'iterator: Iterator\n'+ + ' The result iterator.') + doc_str = doc_str % (desc.value, '\n'.join(param_str)) + + def creator(*args, **kwargs): + """Create an iterator. + The parameters listed below can be passed in as keyword arguments. + + Parameters + ---------- + name : string, required. + Name of the resulting data iterator. + + Returns + ------- + symbol: Symbol + the resulting symbol + """ + param_keys = [] + param_vals = [] + symbol_kwargs = {} + name = kwargs.pop('name', None) + + for k, v in kwargs.items(): + param_keys.append(c_str(k)) + param_vals.append(c_str(str(v))) + # create atomic symbol + param_keys = c_array(ctypes.c_char_p, param_keys) + param_vals = c_array(ctypes.c_char_p, param_vals) + iter_handle = DataIterHandle() + check_call(_LIB.MXDataIterCreateIter( + handle, len(param_keys), + param_keys, param_vals, + ctypes.byref(iter_handle))) + + if len(args): + raise TypeError('%s can only accept keyword arguments' % iter_name) + + return DataIter(iter_handle) + + creator.__name__ = iter_name + creator.__doc__ = doc_str + return creator + + +def _init_io_module(): + """List and add all the data iterators to current module.""" + plist = ctypes.POINTER(ctypes.c_void_p)() + size = ctypes.c_uint() + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + + module_obj = sys.modules[__name__] + for i in range(size.value): + hdl = ctypes.c_void_p(plist[i]) + dataiter = _make_io_iterator(hdl) + setattr(module_obj, dataiter.__name__, dataiter) + +# Initialize the io in startups +_init_io_module() diff --git a/python/test_io.py b/python/test_io.py index 6909176d11c2..d15d4cc32fcd 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -1,22 +1,21 @@ #pylint: skip-file import mxnet as mx +import numpy as np +import os -dataiter = mx.io.DataIter() -#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') -dataiter.createbyname('mnist') -dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") -dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") -dataiter.setparam('shuffle', '1') -dataiter.setparam('seed_data', '2') -dataiter.setparam('batch_size', '100') - -dataiter.init() +dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") dataiter.beforefirst() -for i in range(100): - dataiter.next() - info = "Batch %d" % (i) +idx = 0 +while dataiter.next(): + info = "Batch %d" % (idx) + idx += 1 print info - label = dataiter.getdata() + ''' + label = dataiter.getlabel() print label.numpy + ''' + diff --git a/python/test_mnist.py b/python/test_mnist.py index 63153cbe7f19..149d4bbcd454 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -5,52 +5,7 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) - return np.sum(pred == label) * 1.0 / out.shape[0] - - -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100, flatten=True): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.flatten = flatten - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - if self.flatten: - return (self.data[start:end, :], self.label[start:end]) - else: - return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), - self.label[start:end]) - - + return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] # symbol net batch_size = 100 @@ -100,20 +55,29 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train = MNISTIter("train", batch_size, False) -valid = MNISTIter("valid", batch_size, False) +train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +train_dataiter.beforefirst() +val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +val_dataiter.beforefirst() for i in xrange(epoch): # train print "Epoch %d" % i train_acc = 0.0 val_acc = 0.0 - while train.Next(): - data, label = train.Get() - inputs["data"].numpy[:] = data - inputs["sm_label"].numpy[:] = label + train_nbatch = 0 + val_nbatch = 0 + while train_dataiter.next(): + data = train_dataiter.getdata() + label = train_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy executor.backward([grad_narray]) @@ -121,15 +85,17 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - while valid.Next(): - data, label = valid.Get() - inputs["data"].numpy[:] = data + while val_dataiter.next(): + data = val_dataiter.getdata() + label = val_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy executor.forward() val_acc += CalAcc(out_narray.numpy, label) - print "Train Acc: ", train_acc / train.nbatch - print "Valid Acc: ", val_acc / valid.nbatch - train.BeforeFirst() - valid.BeforeFirst() + val_nbatch += 1 + print "Train Acc: ", train_acc / train_nbatch + print "Valid Acc: ", val_acc / val_nbatch + train_dataiter.beforefirst() + val_dataiter.beforefirst() diff --git a/src/c_api.cc b/src/c_api.cc index f5f2d12ff66d..849c55ebde82 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -611,3 +611,86 @@ int MXExecutorBind(SymbolHandle symbol_handle, *out = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); API_END(); } + +//-------------------------------------------- +// Part 5: IO Interface +//-------------------------------------------- +int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array) { + API_BEGIN(); + auto &vec = dmlc::Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) + API_END(); +} + +int MXDataIterGetName(DataIterCreator iter, + const char **out_name) { + API_BEGIN(); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); + API_END(); +} + +int MXDataIterGetIterInfo(DataIterCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions) { + DataIteratorReg *e = static_cast(creator); + return MXAPIGetFunctionRegInfo(e, name, description, num_args, + arg_names, arg_type_infos, arg_descriptions); +} + +int MXDataIterCreateIter(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IIterator *iter = nullptr; + API_BEGIN(); + DataIteratorReg *e = static_cast(creator); + iter = e->body(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->SetInit(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); +} + +int MXDataIterFree(DataIterHandle handle) { + API_BEGIN(); + delete static_cast *>(handle); + API_END(); +} + +int MXDataIterBeforeFirst(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->BeforeFirst(); + API_END(); +} + +int MXDataIterNext(DataIterHandle handle, int *out) { + API_BEGIN(); + *out = static_cast* >(handle)->Next(); + API_END(); +} + +int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[1], 0); + API_END(); +} + +int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[0], 0); + API_END(); +} + diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 6a705c483e5b..88a3e4d82acd 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -106,9 +106,10 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - virtual void InitParams(const std::vector >& kwargs) { + virtual void SetInit(const std::vector >& kwargs) { std::map kmap(kwargs.begin(), kwargs.end()); param.Init(kmap); + this->Init(); } private: inline void LoadImage(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index c6fab8d376d7..d6119d6c8a69 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -10,7 +10,7 @@ namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) +MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); From 08dcaf7dbbcc452c6209f99f4e3534c7d14ef74d Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 19/61] clean io interface --- include/mxnet/io.h | 25 --------------------- python/mxnet/io.py | 14 +++++------- python/test_mnist.py | 4 +--- src/common/utils.h | 6 ++++++ src/io/io.cc | 48 ----------------------------------------- src/io/iter_mnist-inl.h | 25 +++++++++++---------- 6 files changed, 24 insertions(+), 98 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 4ca5ed05fd18..ac22919745a1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -20,12 +20,6 @@ namespace mxnet { template class IIterator : public dmlc::DataIter { public: - /*! - * \brief set the parameter - * \param name name of parameter - * \param val value of parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief set the parameters and init iter * \param kwargs key-value pairs @@ -89,25 +83,6 @@ struct DataBatch { void Naming(std::vector names); }; // struct DataBatch -/*! - * \brief create the databatch iterator IIterator - * \param cfg configure settings key=vale pair - * \return the data IIterator ptr - */ -IIterator *CreateIterator(const std::vector > &cfg); -/*! - * \brief create the databatch iterator IIterator from config file - * \param cfg_path configure file path - * \return the data IIterator ptr - */ -IIterator *CreateIteratorFromConfig(const char* cfg_path); -/*! - * \brief create the databatch iterator IIterator by iter name - * \param iter_name can be mnist, imgrec and so on - * \return the data IIterator ptr - */ -IIterator *CreateIteratorByName(const char* iter_name); - /*! \brief typedef the factory function of data iterator */ typedef IIterator *(*DataIteratorFactory)(); /*! diff --git a/python/mxnet/io.py b/python/mxnet/io.py index baee99a02d61..dba36bd2114c 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -6,7 +6,7 @@ import ctypes import sys from .base import _LIB -from .base import c_array, c_str, mx_uint, string_types +from .base import c_array, c_str, mx_uint from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,9 +43,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ - keyword arguments') + keyword arguments') num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -131,8 +131,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] - symbol_kwargs = {} - name = kwargs.pop('name', None) for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -160,9 +158,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - + check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/python/test_mnist.py b/python/test_mnist.py index 149d4bbcd454..af4fa418e8b1 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -57,7 +57,7 @@ def Update(mom, grad, weight): train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") + batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) train_dataiter.beforefirst() val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", @@ -97,5 +97,3 @@ def Update(mom, grad, weight): train_dataiter.beforefirst() val_dataiter.beforefirst() - - diff --git a/src/common/utils.h b/src/common/utils.h index f55ebc26535f..cf1fd2f1bb36 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -10,12 +10,18 @@ #include #include #include +#include #endif // DMLC_USE_CXX11 namespace common { #if DMLC_USE_CXX11 +/*! + * \brief Random Engine + */ +typedef std::mt19937 RANDOM_ENGINE; + /*! * \brief Helper functions. */ diff --git a/src/io/io.cc b/src/io/io.cc index aafe85073a52..fb7a8c2d3092 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -11,55 +11,7 @@ #include #include #include "iter_mnist-inl.h" -#include "../utils/random.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); } // namespace dmlc - -namespace mxnet { - IIterator *CreateIterator( - const std::vector< std::pair > &cfg) { - size_t i = 0; - IIterator *it = NULL; - for (; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "iter")) { - if (!strcmp(val, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); continue; - } - CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; - } - if (it != NULL) { - it->SetParam(name, val); - } - } - CHECK(it != NULL) << "must specify iterator by iter=itername"; - return it; - } - - IIterator *CreateIteratorFromConfig(const char* cfg_path) { - std::ifstream ifs(cfg_path, std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - dmlc::Config cfg(ifs); - for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - dmlc::Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - return CreateIterator(itcfg); - } - - IIterator *CreateIteratorByName(const char* iter_name) { - IIterator *it = NULL; - // Currently only support mnist - if (!strcmp(iter_name, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); - } - CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; - return it; - } -} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 88a3e4d82acd..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -13,7 +13,9 @@ #include #include #include -#include "../utils/random.h" +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { @@ -29,6 +31,8 @@ struct MNISTParam : public dmlc::Parameter { int batch_size; /*! \brief data mode */ int input_flat; + /*! \brief random seed */ + int seed_data; // declare parameters in header file DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") @@ -36,33 +40,29 @@ struct MNISTParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") .describe("Mnist label path."); DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); + .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); + .describe("Whether to print out data info."); DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) .add_enum("noflat", 0).set_default(1) .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed_data).set_default(0) + .describe("Random Seed."); } }; - + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); @@ -111,6 +111,7 @@ class MNISTIterator: public IIterator { param.Init(kmap); this->Init(); } + private: inline void LoadImage(void) { dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); @@ -151,7 +152,7 @@ class MNISTIterator: public IIterator { delete stdlabel; } inline void Shuffle(void) { - rnd.Shuffle(&inst_); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { @@ -191,8 +192,6 @@ class MNISTIterator: public IIterator { unsigned inst_offset_; /*! \brief instance index */ std::vector inst_; - // random sampler - utils::RandomSampler rnd; // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator From bcd2652fb31baccbaedc09ce27fbbb730f9453d2 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 20/61] finish merge remote master --- Makefile | 8 ++++++++ src/io/iter_mnist-inl.h | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/Makefile b/Makefile index bd9a9a0fd5e6..a7d2da815fec 100644 --- a/Makefile +++ b/Makefile @@ -101,18 +101,26 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu +<<<<<<< HEAD convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc iter_mnist.o: src/io/iter_mnist.cc +======= +io.o: src/io/io.cc +>>>>>>> finish merge remote master lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a +<<<<<<< HEAD #test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +======= +test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +>>>>>>> finish merge remote master #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index ef2348488396..ca88b4762c74 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -57,12 +57,27 @@ class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; + mode_ = 1; inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); From 97a88cb93f3e17c5b585e30726ebbbc88e48935b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 21/61] built in python, start polishing new feature required --- include/mxnet/c_api.h | 23 +++++++++++++++++++++++ python/test_io.py | 1 - 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 631e0c032852..0ff3adf54d08 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -508,6 +508,29 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/test_io.py b/python/test_io.py index d15d4cc32fcd..940a4baa5c0d 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -18,4 +18,3 @@ label = dataiter.getlabel() print label.numpy ''' - From 69152997b9367c1e82c8b54ca6ee0d4c8751e0fd Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 22/61] finish old version registry in C --- Makefile | 9 --------- include/mxnet/c_api.h | 32 ++++++++++++++++++++++++++++++++ src/io/iter_mnist-inl.h | 15 +++------------ src/io/iter_mnist.cc | 2 +- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index a7d2da815fec..fd4411640b40 100644 --- a/Makefile +++ b/Makefile @@ -101,26 +101,17 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu -<<<<<<< HEAD convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc iter_mnist.o: src/io/iter_mnist.cc -======= -io.o: src/io/io.cc ->>>>>>> finish merge remote master lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -<<<<<<< HEAD -#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a -======= -test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a ->>>>>>> finish merge remote master #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 0ff3adf54d08..5bf0aeba584e 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -562,5 +562,37 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index ca88b4762c74..9bc40b06d270 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -57,10 +57,7 @@ class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -68,15 +65,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index d6119d6c8a69..f3906ca410e7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,6 +4,7 @@ * \brief register mnist iterator * \author Tianjun Xiao */ +#include #include "./iter_mnist-inl.h" namespace mxnet { @@ -13,6 +14,5 @@ DMLC_REGISTER_PARAMETER(MNISTParam); MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); - } // namespace io } // namespace mxnet From 305d2af5d1a430cc659316a114eefdca88e8e334 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 23/61] modify to dmlc registry --- src/io/iter_mnist.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index f3906ca410e7..3ddda17a10af 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,7 +4,6 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { From 04f9888f7229034bcaf1af46246f2f218d6b1706 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 24/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 84 ++++++++++++++++++++----------------------- python/mxnet/io.py | 19 +++++++++- python/test_mnist.py | 13 ++++--- src/c_api.cc | 1 - 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5bf0aeba584e..9347b62db4cb 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -509,28 +509,52 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator @@ -562,37 +586,5 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index dba36bd2114c..150d89ad3924 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,9 +43,15 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ +<<<<<<< HEAD keyword arguments') num_args = len(kwargs) keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) +======= + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) +>>>>>>> pass python mnist test, begin cleaning vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -131,6 +137,11 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] +<<<<<<< HEAD +======= + symbol_kwargs = {} + name = kwargs.pop('name', None) +>>>>>>> pass python mnist test, begin cleaning for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -158,7 +169,13 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() +<<<<<<< HEAD check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) +======= + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + +>>>>>>> pass python mnist test, begin cleaning module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/python/test_mnist.py b/python/test_mnist.py index af4fa418e8b1..4dd224fd53dc 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -7,6 +7,12 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] +def SetGradient(out_grad, label): + assert(out_grad.shape[0] == label.shape[0]) + for i in xrange(label.shape[0]): + k = label[i] + out_grad[i][k] -= 1.0 + # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -18,10 +24,8 @@ def CalAcc(out, label): softmax = mx.symbol.Softmax(data = fc2, name = 'sm') args_list = softmax.list_arguments() # infer shape -#data_shape = (batch_size, 784) - -data_shape = (batch_size, 1, 28, 28) -arg_shapes, out_shapes = softmax.infer_shape(data=data_shape) +data_shape = (batch_size, 1, 1, 784) +arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] @@ -96,4 +100,3 @@ def Update(mom, grad, weight): print "Valid Acc: ", val_acc / val_nbatch train_dataiter.beforefirst() val_dataiter.beforefirst() - diff --git a/src/c_api.cc b/src/c_api.cc index 849c55ebde82..5965b6a4fab2 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -693,4 +693,3 @@ int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { *out = new NArray(db.data[0], 0); API_END(); } - From c6ae534e9b36a60b66bb67ef117c55cf7c12bf5e Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 25/61] clean io interface --- python/mxnet/io.py | 19 +------------------ src/io/iter_mnist-inl.h | 6 ------ 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 150d89ad3924..19d77b3eea08 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,15 +43,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ -<<<<<<< HEAD - keyword arguments') - num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) -======= keyword arguments') num_args = len(kwargs) keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) ->>>>>>> pass python mnist test, begin cleaning vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -137,11 +131,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] -<<<<<<< HEAD -======= - symbol_kwargs = {} - name = kwargs.pop('name', None) ->>>>>>> pass python mnist test, begin cleaning for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -169,13 +158,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() -<<<<<<< HEAD check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) -======= - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - ->>>>>>> pass python mnist test, begin cleaning module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 9bc40b06d270..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -58,17 +58,11 @@ class MNISTIterator: public IIterator { MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); From 3da4ac0d520d41a947b3303e467293604c4940bd Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:17:01 +0800 Subject: [PATCH 26/61] modify to pass travis --- python/mxnet/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 19d77b3eea08..35fe97bdeea8 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -132,9 +132,9 @@ def creator(*args, **kwargs): param_keys = [] param_vals = [] - for k, v in kwargs.items(): + for k, val in kwargs.items(): param_keys.append(c_str(k)) - param_vals.append(c_str(str(v))) + param_vals.append(c_str(str(val))) # create atomic symbol param_keys = c_array(ctypes.c_char_p, param_keys) param_vals = c_array(ctypes.c_char_p, param_vals) From f4fd400f9fb4ac6eb4b0fd28a6d0296c703366df Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 11:45:53 +0800 Subject: [PATCH 27/61] finish refactoring io code --- include/mxnet/c_api.h | 10 +- include/mxnet/io.h | 6 +- python/mxnet/io.py | 40 ++++---- python/test_mnist.py | 35 ++++--- src/c_api.cc | 11 +-- src/io/iter_mnist-inl.h | 200 ---------------------------------------- src/io/iter_mnist.cc | 188 ++++++++++++++++++++++++++++++++++++- 7 files changed, 231 insertions(+), 259 deletions(-) delete mode 100644 src/io/iter_mnist-inl.h diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 9347b62db4cb..186806213c6d 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -461,14 +461,6 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXListDataIters(mx_uint *out_size, DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXDataIterGetName(DataIterCreator iter, - const char **out_name); /*! * \brief init an iterator, init with parameters * the array size of passed in arguments @@ -564,7 +556,7 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! - * \brief call iterator.BeforeFirst + * \brief call iterator.Reset * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index ac22919745a1..47a59eec54fe 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -24,10 +24,8 @@ class IIterator : public dmlc::DataIter { * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void SetInit(const std::vector >& kwargs) = 0; - /*! \brief initalize the iterator so that we can use the iterator */ - virtual void Init(void) = 0; - /*! \brief set before first of the item */ + virtual void Init(const std::vector >& kwargs) = 0; + /*! \brief reset the iterator */ virtual void BeforeFirst(void) = 0; /*! \brief move to next item */ virtual bool Next(void) = 0; diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 35fe97bdeea8..5b6577c13369 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -27,19 +27,9 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - def __call__(self, *args, **kwargs): - """Invoke iterator as function on inputs. Init params. + def __iter__(self): + """make the class iterable - Parameters - --------- - args: - provide positional arguments, should not be given. - - kwargs: - provide keyword arguments - Returns - ------- - the inited iterator """ if len(args) != 0: raise TypeError('data iterator only accept \ @@ -57,13 +47,31 @@ def beforefirst(self): check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): - """init dataiter + """get next data from iterator + + Returns + ------- + labels and images for the next batch + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) + if next_res.value: + return self.getdata(), self.getlabel() + else: + self.reset() + raise StopIteration + + def iter_next(self): + """iterate to next data with return value + Returns + ------- + return true if success """ next_res = ctypes.c_int(0) check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value - + def getdata(self): """get data from batch @@ -126,8 +134,8 @@ def creator(*args, **kwargs): Returns ------- - symbol: Symbol - the resulting symbol + dataiter: Dataiter + the resulting data iterator """ param_keys = [] param_vals = [] diff --git a/python/test_mnist.py b/python/test_mnist.py index 4dd224fd53dc..b4e6db67465d 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -59,14 +59,14 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) -train_dataiter.beforefirst() -val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") -val_dataiter.beforefirst() +train_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/train-images-idx3-ubyte", + label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=0, flat=1, seed=1) +val_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=0, flat=1) for i in xrange(epoch): # train @@ -75,10 +75,11 @@ def Update(mom, grad, weight): val_acc = 0.0 train_nbatch = 0 val_nbatch = 0 - while train_dataiter.next(): - data = train_dataiter.getdata() - label = train_dataiter.getlabel().numpy.astype(np.int32) - inputs["data"].numpy[:] = data.numpy + + for data, label in train_dataiter: + data = data.numpy + label = label.numpy.astype(np.int32) + inputs["data"].numpy[:] = data executor.forward() train_acc += CalAcc(out_narray.numpy, label) train_nbatch += 1 @@ -89,14 +90,12 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - while val_dataiter.next(): - data = val_dataiter.getdata() - label = val_dataiter.getlabel().numpy.astype(np.int32) - inputs["data"].numpy[:] = data.numpy + for data, label in val_dataiter: + data = data.numpy + label = label.numpy.astype(np.int32) + inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) val_nbatch += 1 print "Train Acc: ", train_acc / train_nbatch print "Valid Acc: ", val_acc / val_nbatch - train_dataiter.beforefirst() - val_dataiter.beforefirst() diff --git a/src/c_api.cc b/src/c_api.cc index 5965b6a4fab2..4d7381fde0f1 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -624,14 +624,6 @@ int MXListDataIters(mx_uint *out_size, API_END(); } -int MXDataIterGetName(DataIterCreator iter, - const char **out_name) { - API_BEGIN(); - auto *f = static_cast(iter); - *out_name = f->name.c_str(); - API_END(); -} - int MXDataIterGetIterInfo(DataIterCreator creator, const char **name, const char **description, @@ -657,7 +649,8 @@ int MXDataIterCreateIter(DataIterCreator creator, for (int i = 0; i < num_param; ++i) { kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); } - iter->SetInit(kwargs); + iter->Init(kwargs); + iter->BeforeFirst(); *out = iter; API_END_HANDLE_ERROR(delete iter); } diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h deleted file mode 100644 index ef2348488396..000000000000 --- a/src/io/iter_mnist-inl.h +++ /dev/null @@ -1,200 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file iter_mnist-inl.h - * \brief iterator that takes mnist dataset - */ -#ifndef MXNET_IO_ITER_MNIST_INL_H_ -#define MXNET_IO_ITER_MNIST_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../common/utils.h" - -namespace mxnet { -namespace io { -// Define mnist io parameters -struct MNISTParam : public dmlc::Parameter { - /*! \brief path */ - std::string path_img, path_label; - /*! \brief whether to do shuffle */ - bool shuffle; - /*! \brief whether to print info */ - bool silent; - /*! \brief batch size */ - int batch_size; - /*! \brief data mode */ - int input_flat; - /*! \brief random seed */ - int seed_data; - // declare parameters in header file - DMLC_DECLARE_PARAMETER(MNISTParam) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") - .describe("Mnist image path."); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") - .describe("Mnist label path."); - DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); - DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); - DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) - .describe("Batch Size."); - DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1) - .describe("Whether to flat the data into 1D."); - DMLC_DECLARE_FIELD(seed_data).set_default(0) - .describe("Random Seed."); - } -}; - -class MNISTIterator: public IIterator { - public: - MNISTIterator(void) { - img_.dptr_ = NULL; - inst_offset_ = 0; - out_.data.resize(2); - } - virtual ~MNISTIterator(void) { - if (img_.dptr_ != NULL) delete []img_.dptr_; - } - // intialize iterator loads data in - virtual void Init(void) { - this->LoadImage(); - this->LoadLabel(); - // set name - this->SetDataName(std::string("data")); - this->SetDataName(std::string("label")); - if (param.input_flat == 1) { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); - } else { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); - } - out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); - batch_label_.stride_ = 1; - batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = param.batch_size; - if (param.shuffle) this->Shuffle(); - if (param.silent == 0) { - mshadow::Shape<4> s = batch_data_.shape_; - printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); - } - } - virtual void BeforeFirst(void) { - this->loc_ = 0; - } - virtual bool Next(void) { - if (loc_ + param.batch_size <= img_.size(0)) { - batch_data_.dptr_ = img_[loc_].dptr_; - batch_label_.dptr_ = &labels_[loc_]; - out_.data[0] = TBlob(batch_data_); - out_.data[1] = TBlob(batch_label_); - out_.inst_index = &inst_[loc_]; - loc_ += param.batch_size; - return true; - } else { - return false; - } - } - virtual const DataBatch &Value(void) const { - return out_; - } - virtual void SetInit(const std::vector >& kwargs) { - std::map kmap(kwargs.begin(), kwargs.end()); - param.Init(kmap); - this->Init(); - } - - private: - inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); - ReadInt(stdimg); - int image_count = ReadInt(stdimg); - int image_rows = ReadInt(stdimg); - int image_cols = ReadInt(stdimg); - - img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); - img_.stride_ = img_.size(2); - - // allocate continuous memory - img_.dptr_ = new float[img_.MSize()]; - for (int i = 0; i < image_count; ++i) { - for (int j = 0; j < image_rows; ++j) { - for (int k = 0; k < image_cols; ++k) { - unsigned char ch; - CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); - img_[i][j][k] = ch; - } - } - } - // normalize to 0-1 - img_ *= 1.0f / 256.0f; - delete stdimg; - } - inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); - ReadInt(stdlabel); - int labels_count = ReadInt(stdlabel); - labels_.resize(labels_count); - for (int i = 0; i < labels_count; ++i) { - unsigned char ch; - CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); - labels_[i] = ch; - inst_.push_back((unsigned)i + inst_offset_); - } - delete stdlabel; - } - inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); - std::vector tmplabel(labels_.size()); - mshadow::TensorContainer tmpimg(img_.shape_); - for (size_t i = 0; i < inst_.size(); ++i) { - unsigned ridx = inst_[i] - inst_offset_; - mshadow::Copy(tmpimg[i], img_[ridx]); - tmplabel[i] = labels_[ridx]; - } - // copy back - mshadow::Copy(img_, tmpimg); - labels_ = tmplabel; - } - - private: - inline static int ReadInt(dmlc::Stream *fi) { - unsigned char buf[4]; - CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) - << "invalid mnist format"; - return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); - } - - private: - /*! \brief MNIST iter params */ - MNISTParam param; - /*! \brief output */ - DataBatch out_; - /*! \brief current location */ - index_t loc_; - /*! \brief image content */ - mshadow::Tensor img_; - /*! \brief label content */ - std::vector labels_; - /*! \brief batch data tensor */ - mshadow::Tensor batch_data_; - /*! \brief batch label tensor */ - mshadow::Tensor batch_label_; - /*! \brief instance index offset */ - unsigned inst_offset_; - /*! \brief instance index */ - std::vector inst_; - // magic number to setup randomness - static const int kRandMagic = 0; -}; // class MNISTIterator -} // namespace io -} // namespace mxnet -#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 3ddda17a10af..b48064de184a 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,196 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include "./iter_mnist-inl.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string image, label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + bool flat; + /*! \brief random seed */ + int seed; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(image).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); + DMLC_DECLARE_FIELD(flat).set_default(true) + .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed).set_default(0) + .describe("Random Seed."); + } +}; + +class MNISTIter: public IIterator { + public: + MNISTIter(void) { + img_.dptr_ = NULL; + inst_offset_ = 0; + out_.data.resize(2); + } + virtual ~MNISTIter(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + // intialize iterator loads data in + virtual void Init(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (param.flat) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIter: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + param.batch_size <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += param.batch_size; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(param.image.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed)); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief MNIST iter params */ + MNISTParam param; + /*! \brief output */ + DataBatch out_; + /*! \brief current location */ + index_t loc_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIter DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) - .describe("Create MNISTIterator") +MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) + .describe("Create MNISTIter") .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From b6ebf945b1b07a981627a52ede84c39c3353f1a6 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 11:59:30 +0800 Subject: [PATCH 28/61] add io.md in doc --- python/mxnet/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 5b6577c13369..31cd3c276ab2 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -48,7 +48,7 @@ def beforefirst(self): def next(self): """get next data from iterator - + Returns ------- labels and images for the next batch @@ -71,7 +71,7 @@ def iter_next(self): next_res = ctypes.c_int(0) check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value - + def getdata(self): """get data from batch From 6e1b7dd358966b83207487434e9ca295373dddc7 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 22:54:08 +0800 Subject: [PATCH 29/61] merged latest master --- python/mxnet/io.py | 15 ++----- python/test_io.py | 20 ---------- python/test_mnist.py | 95 ++++++++++++++++++++++++++++++-------------- src/io/io.cc | 7 ---- 4 files changed, 70 insertions(+), 67 deletions(-) delete mode 100644 python/test_io.py diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 31cd3c276ab2..192f3f52b6c5 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -31,23 +31,16 @@ def __iter__(self): """make the class iterable """ - if len(args) != 0: - raise TypeError('data iterator only accept \ - keyword arguments') - num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) - vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) - check_call(_LIB.MXDataIterSetInit( \ - self.handle, num_args, keys, vals)) - - def beforefirst(self): + return self + + def reset(self): """set loc to 0 """ check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): - """get next data from iterator + """get next data batch from iterator Returns ------- diff --git a/python/test_io.py b/python/test_io.py deleted file mode 100644 index 940a4baa5c0d..000000000000 --- a/python/test_io.py +++ /dev/null @@ -1,20 +0,0 @@ -#pylint: skip-file -import mxnet as mx -import numpy as np -import os - -dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") - -dataiter.beforefirst() - -idx = 0 -while dataiter.next(): - info = "Batch %d" % (idx) - idx += 1 - print info - ''' - label = dataiter.getlabel() - print label.numpy - ''' diff --git a/python/test_mnist.py b/python/test_mnist.py index b4e6db67465d..d58754c783ad 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -5,13 +5,52 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) - return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] + return np.sum(pred == label) * 1.0 / out.shape[0] + + +# load data +class MNISTIter(object): + def __init__(self, which_set, batch_size=100, flatten=True): + if not os.path.exists('mnist.pkl.gz'): + os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") + f = gzip.open('mnist.pkl.gz', 'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + if which_set == 'train': + self.data = train_set[0] + self.label = np.asarray(train_set[1]) + elif which_set == 'valid': + self.data = valid_set[0] + self.label = np.asarray(valid_set[1]) + else: + self.data = test_set[0] + self.data = np.asarray(test_set[1]) + self.flatten = flatten + self.batch_size = batch_size + self.nbatch = self.data.shape[0] / batch_size + assert(self.data.shape[0] % batch_size == 0) # I am lazy + self.now_idx = -1 + def BeforeFirst(self): + self.now_idx = -1 + def Next(self): + self.now_idx += 1 + if self.now_idx == self.nbatch: + return False + return True + def Get(self): + if self.now_idx < 0: + raise Exception("Iterator is at head") + elif self.now_idx >= self.nbatch: + raise Exception("Iterator is at end") + start = self.now_idx * self.batch_size + end = (self.now_idx + 1) * self.batch_size + if self.flatten: + return (self.data[start:end, :], self.label[start:end]) + else: + return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), + self.label[start:end]) + -def SetGradient(out_grad, label): - assert(out_grad.shape[0] == label.shape[0]) - for i in xrange(label.shape[0]): - k = label[i] - out_grad[i][k] -= 1.0 # symbol net batch_size = 100 @@ -24,8 +63,10 @@ def SetGradient(out_grad, label): softmax = mx.symbol.Softmax(data = fc2, name = 'sm') args_list = softmax.list_arguments() # infer shape -data_shape = (batch_size, 1, 1, 784) -arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +#data_shape = (batch_size, 784) + +data_shape = (batch_size, 1, 28, 28) +arg_shapes, out_shapes = softmax.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] @@ -59,30 +100,23 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/train-images-idx3-ubyte", - label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=0, flat=1, seed=1) -val_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=0, flat=1) +train = MNISTIter("train", batch_size, False) +valid = MNISTIter("valid", batch_size, False) for i in xrange(epoch): # train print "Epoch %d" % i train_acc = 0.0 val_acc = 0.0 - train_nbatch = 0 - val_nbatch = 0 - - for data, label in train_dataiter: - data = data.numpy - label = label.numpy.astype(np.int32) + while train.Next(): + data, label = train.Get() + print np.shape(data) + print np.shape(label) + exit(0) inputs["data"].numpy[:] = data + inputs["sm_label"].numpy[:] = label executor.forward() train_acc += CalAcc(out_narray.numpy, label) - train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy executor.backward([grad_narray]) @@ -90,12 +124,15 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - for data, label in val_dataiter: - data = data.numpy - label = label.numpy.astype(np.int32) + while valid.Next(): + data, label = valid.Get() inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) - val_nbatch += 1 - print "Train Acc: ", train_acc / train_nbatch - print "Valid Acc: ", val_acc / val_nbatch + print "Train Acc: ", train_acc / train.nbatch + print "Valid Acc: ", val_acc / valid.nbatch + train.BeforeFirst() + valid.BeforeFirst() + + + diff --git a/src/io/io.cc b/src/io/io.cc index fb7a8c2d3092..bd5b78dda643 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -3,14 +3,7 @@ #define _CRT_SECURE_NO_DEPRECATE #include -#include -#include #include -#include -#include -#include -#include -#include "iter_mnist-inl.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); From 1b19f9d792189248f631985ef52d1b608096d0b9 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 30/61] finish merge remote master --- Makefile | 5 ++ include/mxnet/io.h | 105 +++++++++++++++++++++++ src/io/inst_vector.h | 117 ++++++++++++++++++++++++++ src/io/io.cc | 60 +++++++++++++ src/io/iter_mnist-inl.h | 181 ++++++++++++++++++++++++++++++++++++++++ test/io_mnist_test.cc | 96 +++++++++++++++++++++ 6 files changed, 564 insertions(+) create mode 100644 include/mxnet/io.h create mode 100644 src/io/inst_vector.h create mode 100644 src/io/io.cc create mode 100644 src/io/iter_mnist-inl.h create mode 100644 test/io_mnist_test.cc diff --git a/Makefile b/Makefile index 8ebcfa896d62..ffb0524a853a 100644 --- a/Makefile +++ b/Makefile @@ -101,15 +101,20 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu +<<<<<<< HEAD convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu +======= +io.o: src/io/io.cc +>>>>>>> finish merge remote master lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) test/test_storage: test/test_storage.cc lib/libmxnet.a +test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/io.h b/include/mxnet/io.h new file mode 100644 index 000000000000..29dccbace770 --- /dev/null +++ b/include/mxnet/io.h @@ -0,0 +1,105 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file io.h + * \brief mxnet io data structure and data iterator + */ +#ifndef MXNET_IO_H_ +#define MXNET_IO_H_ +#include +#include +#include +#include +#include "./base.h" + +namespace mxnet { +/*! + * \brief iterator type + * \tparam DType data type + */ +template +class IIterator : public dmlc::DataIter { + public: + /*! + * \brief set the parameter + * \param name name of parameter + * \param val value of parameter + */ + virtual void SetParam(const char *name, const char *val) = 0; + /*! \brief initalize the iterator so that we can use the iterator */ + virtual void Init(void) = 0; + /*! \brief set before first of the item */ + virtual void BeforeFirst(void) = 0; + /*! \brief move to next item */ + virtual bool Next(void) = 0; + /*! \brief get current data */ + virtual const DType &Value(void) const = 0; + /*! \brief constructor */ + virtual ~IIterator(void) {} + /*! \brief store the name of each data, it could be used for making NArrays */ + std::vector data_names; + /*! \brief set data name to each attribute of data */ + inline void SetDataName(const std::string data_name){ + data_names.push_back(data_name); + } +}; // class IIterator + +/*! \brief a single data instance */ +struct DataInst { + /*! \brief unique id for instance */ + unsigned index; + /*! \brief content of data */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; +}; // struct DataInst + +/*! + * \brief a standard batch of data commonly used by iterator + * a databatch contains multiple TBlobs. Each Tblobs has + * a name stored in a map. There's no different between + * data and label, how we use them is to see the DNN implementation. + */ +struct DataBatch { + public: + /*! \brief unique id for instance, can be NULL, sometimes is useful */ + unsigned *inst_index; + /*! \brief number of instance */ + mshadow::index_t batch_size; + /*! \brief number of padding elements in this batch, + this is used to indicate the last elements in the batch are only padded up to match the batch, and should be discarded */ + mshadow::index_t num_batch_padd; + public: + /*! \brief content of dense data, if this DataBatch is dense */ + std::vector data; + /*! \brief extra data to be fed to the network */ + std::string extra_data; + public: + /*! \brief constructor */ + DataBatch(void) { + inst_index = NULL; + batch_size = 0; num_batch_padd = 0; + } + /*! \brief giving name to the data */ + void Naming(std::vector names); +}; // struct DataBatch + +/*! + * \brief create the databatch iterator IIterator + * \param cfg configure settings key=vale pair + * \return the data IIterator ptr + */ +IIterator *CreateIterator(const std::vector > &cfg); +/*! + * \brief create the databatch iterator IIterator from config file + * \param cfg_path configure file path + * \return the data IIterator ptr + */ +IIterator *CreateIteratorFromConfig(const char* cfg_path); +/*! + * \brief create the databatch iterator IIterator by iter name + * \param iter_name can be mnist, imgrec and so on + * \return the data IIterator ptr + */ +IIterator *CreateIteratorByName(const char* iter_name); +} // namespace mxnet +#endif // MXNET_IO_H_ diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h new file mode 100644 index 000000000000..1ae734631680 --- /dev/null +++ b/src/io/inst_vector.h @@ -0,0 +1,117 @@ +/*! + * Copyright (c) 2015 by Contributors + * \inst_vector.h + * \brief holder of a sequence of DataInst in CPU + * that are not necessarily of same shape + */ +#ifndef MXNET_IO_INST_VECTOR_H_ +#define MXNET_IO_INST_VECTOR_H_ +#include +#include +#include +#include +#include "./data.h" +namespace mxnet { +/*! + * \brief tensor vector that can store sequence of tensor + * in a memory compact way, tensors do not have to be of same shape + */ +template +class TensorVector { + public: + TensorVector(void) { + this->Clear(); + } + // get i-th tensor + inline mshadow::Tensor + operator[](size_t i) const { + CHECK(i + 1 < offset_.size()); + CHECK(shape_[i].Size() == offset_[i + 1] - offset_[i]); + return mshadow::Tensor + (reinterpret_cast(BeginPtr(content_)) + offset_[i], shape_[i]); + } + inline mshadow::Tensor Back() const { + return (*this)[Size() - 1]; + } + inline size_t Size(void) const { + return shape_.size(); + } + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(mshadow::Shape shape) { + shape_.push_back(shape); + offset_.push_back(offset_.back() + shape.Size()); + content_.resize(offset_.back()); + } + inline void Clear(void) { + offset_.clear(); + offset_.push_back(0); + content_.clear(); + shape_.clear(); + } + + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector > shape_; +}; + +/*! + * \brief tblob vector that can store sequence of tblob + * in a memory compact way, tblobs do not have to be of same shape + */ +template +class TBlobVector { + public: + TBlobVector(void) { + this->Clear(); + } + // get i-th tblob + inline TBlob operator[](size_t i) const; + // get the last tblob + inline TBlob Back(); + // return the size of the vector + inline size_t Size(void) const; + // push a tensor of certain shape + // return the reference of the pushed tensor + inline void Push(TShape shape_); + inline void Clear(void); + private: + // offset of the data content + std::vector offset_; + // data content + std::vector content_; + // shape of data + std::vector shape_; +}; + +/*! + * \brief instance vector that can holds + * non-uniform shape data instance in a shape efficient way + */ +class InstVector { + public: + inline size_t Size(void) const { + return index_.size(); + } + // instance + inline DataInst operator[](size_t i) const; + // get back of instance vector + inline DataInst Back() const; + // clear the container + inline void Clear(void); + // push the newly coming instance + inline void Push(unsigned index, TBlob data_); + + private: + /*! \brief index of the data */ + std::vector index_; + // data + std::vector > data_; + // extra data + std::vector extra_data_; +}; +#endif // MXNET_IO_INST_VECTOR_H_ diff --git a/src/io/io.cc b/src/io/io.cc new file mode 100644 index 000000000000..2df16e4fc209 --- /dev/null +++ b/src/io/io.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2015 by Contributors +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include +#include +#include +#include +#include +#include +#include +#include "iter_mnist-inl.h" +#include "../utils/random.h" + +namespace mxnet { + IIterator *CreateIterator( + const std::vector< std::pair > &cfg) { + size_t i = 0; + IIterator *it = NULL; + for (; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "iter")) { + if (!strcmp(val, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); continue; + } + CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; + } + if (it != NULL) { + it->SetParam(name, val); + } + } + CHECK(it != NULL) << "must specify iterator by iter=itername"; + return it; + } + + IIterator *CreateIteratorFromConfig(const char* cfg_path) { + std::ifstream ifs(cfg_path, std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + dmlc::Config cfg(ifs); + for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + dmlc::Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + return CreateIterator(itcfg); + } + + IIterator *CreateIteratorByName(const char* iter_name) { + IIterator *it = NULL; + // Currently only support mnist + if (!strcmp(iter_name, "mnist")) { + CHECK(it == NULL) << "mnist cannot chain over other iterator"; + it = new MNISTIterator(); + } + CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; + return it; + } +} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h new file mode 100644 index 000000000000..376838fcf3f0 --- /dev/null +++ b/src/io/iter_mnist-inl.h @@ -0,0 +1,181 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist-inl.h + * \brief iterator that takes mnist dataset + */ +#ifndef MXNET_IO_ITER_MNIST_INL_H_ +#define MXNET_IO_ITER_MNIST_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include "../utils/random.h" + +namespace mxnet { +class MNISTIterator: public IIterator { + public: + MNISTIterator(void) { + img_.dptr_ = NULL; + mode_ = 1; + inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); + out_.data.resize(2); + } + virtual ~MNISTIterator(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } + // intialize iterator loads data in + virtual void Init(void) { + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (mode_ == 1) { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = batch_size_; + if (shuffle_) this->Shuffle(); + if (silent_ == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + batch_size_ <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += batch_size_; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + rnd.Shuffle(&inst_); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief silent */ + int silent_; + /*! \brief path */ + std::string path_img, path_label; + /*! \brief output */ + DataBatch out_; + /*! \brief whether do shuffle */ + int shuffle_; + /*! \brief data mode */ + int mode_; + /*! \brief current location */ + index_t loc_; + /*! \brief batch size */ + index_t batch_size_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // random sampler + utils::RandomSampler rnd; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIterator +} // namespace mxnet +#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc new file mode 100644 index 000000000000..2bfba24a507a --- /dev/null +++ b/test/io_mnist_test.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2015 by Contributors +// IO test code + +#include +#include +#include +#include +#include +#include "mxnet/io.h" +#include "../src/io/iter_mnist-inl.h" + +using namespace std; +using namespace mxnet; +using namespace dmlc; + +void InitIter(IIterator* itr, + const std::vector< std::pair< std::string, std::string> > &defcfg) { + for (size_t i = 0; i < defcfg.size(); ++i) { + itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); + } + itr->Init(); +} + +IIterator* CreateIterators( + const std::vector< std::pair< std::string, std::string> >& cfg) { + IIterator* data_itr = NULL; + int flag = 0; + std::string evname; + std::vector< std::pair< std::string, std::string> > itcfg; + std::vector< std::pair< std::string, std::string> > defcfg; + for (size_t i = 0; i < cfg.size(); ++i) { + const char *name = cfg[i].first.c_str(); + const char *val = cfg[i].second.c_str(); + if (!strcmp(name, "data")) { + flag = 1; continue; + } + if (!strcmp(name, "eval")) { + flag = 2; continue; + } + if (!strcmp(name, "pred")) { + flag = 3; continue; + } + if (!strcmp(name, "iterend") && !strcmp(val, "true")) { + if (flag == 1) { + data_itr = mxnet::CreateIterator(itcfg); + } + flag = 0; itcfg.clear(); + } + if (flag == 0) { + defcfg.push_back(cfg[i]); + } else { + itcfg.push_back(cfg[i]); + } + } + if (data_itr != NULL) { + InitIter(data_itr, defcfg); + } + return data_itr; +} + +/*! + * Usage: ./io_mnist_test /path/to/io_config/file + * Example + * data = train + * iter = mnist + * path_img = "./data/mnist/train-images-idx3-ubyte" + * path_label = "./data/mnist/train-labels-idx1-ubyte" + * shuffle = 1 + * iterend = true + * input_shape = 1,1,784 + * batch_size = 100 + * + */ + +int main(int argc, char** argv) { + std::ifstream ifs(argv[1], std::ifstream::in); + std::vector< std::pair< std::string, std::string> > itcfg; + Config cfg(ifs); + for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { + Config::ConfigEntry ent = *iter; + itcfg.push_back(std::make_pair(ent.first, ent.second)); + } + // Get the data and init + IIterator* data_itr = CreateIterators(itcfg); + data_itr->BeforeFirst(); + int batch_dir = 0; + while (data_itr->Next()) { + std::cout << "Label of Batch " << batch_dir++ << std::endl; + // print label + DataBatch db = data_itr->Value(); + mshadow::Tensor label = db.data[1].get(); + for (size_t i = 0; i < label.shape_.shape_[0]; i++) + std::cout << label.dptr_[i] << " "; + std::cout << "\n"; + } +} From 0c24dbfd92a81cd8b8386e3843bf1c818f4bddf5 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 31/61] built in python, start polishing new feature required --- Makefile | 4 +-- include/mxnet/c_api.h | 23 ++++++++++++ python/mxnet/base.py | 2 +- python/mxnet/io.py | 84 +++++++++++++++++++++++++++++++++++++++++++ python/test_io.py | 22 ++++++++++++ src/c_api.cc | 51 ++++++++++++++++++++++++++ 6 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 python/mxnet/io.py create mode 100644 python/test_io.py diff --git a/Makefile b/Makefile index ffb0524a853a..428d7b5bccc3 100644 --- a/Makefile +++ b/Makefile @@ -110,8 +110,8 @@ reshape_gpu.o: src/operator/reshape.cu io.o: src/io/io.cc >>>>>>> finish merge remote master -lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) -lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) +lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) +lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5802c32cf75c..f1119b3323c5 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -460,6 +460,29 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 6cf8c616f805..744ef46ba9f2 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -75,7 +75,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p - +DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition #---------------------------- diff --git a/python/mxnet/io.py b/python/mxnet/io.py new file mode 100644 index 000000000000..96e4938a79b3 --- /dev/null +++ b/python/mxnet/io.py @@ -0,0 +1,84 @@ +# coding: utf-8 + +"""NArray interface of mxnet""" +from __future__ import absolute_import + +import ctypes +from .base import _LIB +from .base import DataIterHandle, NArrayHandle +from .base import check_call +from .narray import NArray + +class DataIter(object): + """DataIter object in mxnet + + DataIter is a wrapper for C++ DataIter functions + """ + + def __init__(self): + """initialize a new dataiter + + """ + self._datahandle = None + + def createfromcfg(self, cfg_path): + """create a dataiter from config file + + cfg_path is the path of configure file + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) + self._datahandle = hdl + + def createbyname(self, iter_name): + """create a dataiter by the name + + iter_name can be mnist imgrec or so on + """ + hdl = DataIterHandle() + check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) + self._datahandle = hdl + + def setparam(self, name, val): + """set param value for dataiter + + name prameter name + val parameter value + """ + check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) + + def init(self): + """init dataiter + + """ + check_call(_LIB.MXIOInit(self._datahandle)) + + def beforefirst(self): + """set loc to 0 + + """ + check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + + def next(self): + """init dataiter + + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + return next_res.value + + def getdata(self): + """get data from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) + + def getlabel(self): + """get label from batch + + """ + hdl = NArrayHandle() + check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + return NArray(hdl) diff --git a/python/test_io.py b/python/test_io.py new file mode 100644 index 000000000000..6909176d11c2 --- /dev/null +++ b/python/test_io.py @@ -0,0 +1,22 @@ +#pylint: skip-file +import mxnet as mx + +dataiter = mx.io.DataIter() +#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') +dataiter.createbyname('mnist') +dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") +dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") +dataiter.setparam('shuffle', '1') +dataiter.setparam('seed_data', '2') +dataiter.setparam('batch_size', '100') + +dataiter.init() + +dataiter.beforefirst() + +for i in range(100): + dataiter.next() + info = "Batch %d" % (i) + print info + label = dataiter.getdata() + print label.numpy diff --git a/src/c_api.cc b/src/c_api.cc index b251ba578743..0e378f49057f 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -609,3 +610,53 @@ int MXExecutorBind(SymbolHandle symbol_handle, *out = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); API_END(); } + +int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out) { + API_BEGIN(); + *out = static_cast(CreateIteratorFromConfig(cfg)); + API_END(); +} + +int MXIOCreateByName(const char *iter_name, DataIterHandle *out) { + API_BEGIN(); + *out = static_cast(CreateIteratorByName(iter_name)); + API_END(); +} + +int MXIOSetParam(DataIterHandle handle, const char *name, const char *val) { + API_BEGIN(); + static_cast* >(handle)->SetParam(name, val); + API_END(); +} + +int MXIOInit(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->Init(); + API_END(); +} + +int MXIOBeforeFirst(DataIterHandle handle) { + API_BEGIN(); + static_cast* >(handle)->BeforeFirst(); + API_END(); +} + +int MXIONext(DataIterHandle handle, int *out) { + API_BEGIN(); + *out = static_cast* >(handle)->Next(); + API_END(); +} + +int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[1], 0); + API_END(); +} + +int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { + API_BEGIN(); + DataBatch db = static_cast* >(handle)->Value(); + *out = new NArray(db.data[0], 0); + API_END(); +} From a2f03a792781f3fae27375d820edd5305484f9af Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 32/61] finish old version registry in C --- Makefile | 8 +++- include/mxnet/c_api.h | 34 +++++++++++++++++ include/mxnet/io.h | 5 +++ python/mxnet/base.py | 1 + src/c_api.cc | 39 +++++++++++++++++++ src/io/io.cc | 4 +- src/io/iter_mnist-inl.h | 83 ++++++++++++++++++++++++----------------- src/io/iter_mnist.cc | 17 +++++++++ 8 files changed, 152 insertions(+), 39 deletions(-) create mode 100644 src/io/iter_mnist.cc diff --git a/Makefile b/Makefile index 428d7b5bccc3..4318d384cff1 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o +OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter-mnist.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -108,13 +108,17 @@ reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu ======= io.o: src/io/io.cc +<<<<<<< HEAD >>>>>>> finish merge remote master +======= +iter_mnist.o: src/io/iter_mnist.cc +>>>>>>> finish old version registry in C lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a +#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a #test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index f1119b3323c5..88a2934bbbdf 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -36,6 +36,8 @@ typedef void *SymbolHandle; typedef void *AtomicSymbolHandle; /*! \brief handle to an Executor */ typedef void *ExecutorHandle; +/*! \brief handle a dataiter creator */ +typedef void *DataIterCreator; /*! \brief handle to a DataIterator */ typedef void *DataIterHandle; /*! @@ -519,5 +521,37 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, */ MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 29dccbace770..16c86138abe1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -25,6 +25,11 @@ class IIterator : public dmlc::DataIter { * \param val value of parameter */ virtual void SetParam(const char *name, const char *val) = 0; + /*! + * \brief init the parameter + * \param kwargs key-value pairs + */ + virtual void InitParams(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 744ef46ba9f2..ec9d43dc58aa 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -75,6 +75,7 @@ def _load_lib(): SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p ExecutorHandle = ctypes.c_void_p +DataIterCreatorHandle = ctypes.c_void_p DataIterHandle = ctypes.c_void_p #---------------------------- # helper function definition diff --git a/src/c_api.cc b/src/c_api.cc index 0e378f49057f..ea095ddfb5c7 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -660,3 +661,41 @@ int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { *out = new NArray(db.data[0], 0); API_END(); } + +int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array) { + API_BEGIN(); + auto &vec = Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) + API_END(); +} + +int MXIOIterGetName(DataIterCreator iter, + const char **out_name) { + API_BEGIN(); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); + API_END(); +} + +int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IOIteratorEntry *e = static_cast(creator); + IIterator *iter = (*e)(); + API_BEGIN(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->InitParams(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); +} + + + + diff --git a/src/io/io.cc b/src/io/io.cc index 2df16e4fc209..60c013a812a5 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -23,7 +23,7 @@ namespace mxnet { if (!strcmp(name, "iter")) { if (!strcmp(val, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); continue; + it = new io::MNISTIterator(); continue; } CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; } @@ -52,7 +52,7 @@ namespace mxnet { // Currently only support mnist if (!strcmp(iter_name, "mnist")) { CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new MNISTIterator(); + it = new io::MNISTIterator(); } CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; return it; diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 376838fcf3f0..62168f8f1811 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -10,19 +10,42 @@ #include #include #include +#include #include #include #include "../utils/random.h" namespace mxnet { +namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string path_img, path_label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + int input_flat; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(Param) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); + DMLC_DECLARE_FIELD(shuffle).set_default(false); + DMLC_DECLARE_FIELD(silent).set_default(false); + DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) + .add_enum("noflat", 0).set_default(1); + } +}; + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -30,15 +53,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { @@ -47,34 +64,34 @@ class MNISTIterator: public IIterator { // set name this->SetDataName(std::string("data")); this->SetDataName(std::string("label")); - if (mode_ == 1) { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, 1, img_.size(1) * img_.size(2)); + if (param.input_flat == 1) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); } else { - batch_data_.shape_ = mshadow::Shape4(batch_size_, 1, img_.size(1), img_.size(2)); + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); } out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(batch_size_, 1); + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); batch_label_.stride_ = 1; batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = batch_size_; - if (shuffle_) this->Shuffle(); - if (silent_ == 0) { + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), shuffle_, s[0], s[1], s[2], s[3]); + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); } } virtual void BeforeFirst(void) { this->loc_ = 0; } virtual bool Next(void) { - if (loc_ + batch_size_ <= img_.size(0)) { + if (loc_ + param.batch_size <= img_.size(0)) { batch_data_.dptr_ = img_[loc_].dptr_; batch_label_.dptr_ = &labels_[loc_]; out_.data[0] = TBlob(batch_data_); out_.data[1] = TBlob(batch_label_); out_.inst_index = &inst_[loc_]; - loc_ += batch_size_; + loc_ += param.batch_size; return true; } else { return false; @@ -83,10 +100,13 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - + virtual void InitParams(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + } private: inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(path_img.c_str(), "r"); + dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); ReadInt(stdimg); int image_count = ReadInt(stdimg); int image_rows = ReadInt(stdimg); @@ -111,7 +131,7 @@ class MNISTIterator: public IIterator { delete stdimg; } inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(path_label.c_str(), "r"); + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); ReadInt(stdlabel); int labels_count = ReadInt(stdlabel); labels_.resize(labels_count); @@ -146,20 +166,12 @@ class MNISTIterator: public IIterator { } private: - /*! \brief silent */ - int silent_; - /*! \brief path */ - std::string path_img, path_label; + /*! \brief MNIST iter params */ + MNISTParam param; /*! \brief output */ DataBatch out_; - /*! \brief whether do shuffle */ - int shuffle_; - /*! \brief data mode */ - int mode_; /*! \brief current location */ index_t loc_; - /*! \brief batch size */ - index_t batch_size_; /*! \brief image content */ mshadow::Tensor img_; /*! \brief label content */ @@ -177,5 +189,6 @@ class MNISTIterator: public IIterator { // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator +} // namespace io } // namespace mxnet #endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc new file mode 100644 index 000000000000..942398749378 --- /dev/null +++ b/src/io/iter_mnist.cc @@ -0,0 +1,17 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file iter_mnist.cc + * \brief register mnist iterator + * \author Tianjun Xiao +*/ +#include +#include "./iter_mnist-inl.h" + +namespace mxnet { +namespace io { + +DMLC_REGISTER_PARAMETER(MNISTParam); +REGISTER_IO_ITER(mnist, MNISTIterator); + +} // namespace io +} // namespace mxnet From 9d4d98e05fd966ad4c82a2f944670e4e568fdbc1 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 33/61] modify to dmlc registry --- include/mxnet/io.h | 30 ++++++++++++++++++++++++++++++ python/mxnet/io.py | 25 ++++++++++++++++++++++--- src/c_api.cc | 13 +++++++++---- src/io/io.cc | 5 +++++ src/io/iter_mnist-inl.h | 20 +++++++++++++------- src/io/iter_mnist.cc | 5 +++-- 6 files changed, 82 insertions(+), 16 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 16c86138abe1..600978023b5b 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -6,6 +6,7 @@ #ifndef MXNET_IO_H_ #define MXNET_IO_H_ #include +#include #include #include #include @@ -106,5 +107,34 @@ IIterator *CreateIteratorFromConfig(const char* cfg_path); * \return the data IIterator ptr */ IIterator *CreateIteratorByName(const char* iter_name); + +/*! \brief typedef the factory function of data iterator */ +typedef IIterator *(*DataIteratorFactory)(); +/*! + * \brief Registry entry for DataIterator factory functions. + */ +struct DataIteratorReg + : public dmlc::FunctionRegEntryBase { +}; +//-------------------------------------------------------------- +// The following part are API Registration of Iterators +//-------------------------------------------------------------- +/*! + * \brief Macro to register Iterators + * + * \code + * // example of registering a mnist iterator + * REGISTER_IO_ITERATOR(MNIST, MNISTIterator) + * .describe("Mnist data iterator"); + * + * \endcode + */ +#define MXNET_REGISTER_IO_ITER(name, DataIteratorType) \ + static ::mxnet::IIterator* __create__ ## DataIteratorType ## __() { \ + return new DataIteratorType; \ + } \ + DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name) \ + .set_body(__create__ ## DataIteratorType ## __) } // namespace mxnet #endif // MXNET_IO_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 96e4938a79b3..ead49f07c4fd 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -5,16 +5,35 @@ import ctypes from .base import _LIB +from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray class DataIter(object): - """DataIter object in mxnet + """DataIter object in mxnet. List all the needed functions here. """ - DataIter is a wrapper for C++ DataIter functions - """ + def __init__(self, handle): + """Initialize with handle + Parameters + ---------- + handle : DataIterHandle + the handle to the underlying C++ Data Iterator + """ + self.handle = handle + + def __del__(self): + check_call(_LIB.MXDataIterFree(self.handle)) + + + + + + + + + def __init__(self): """initialize a new dataiter diff --git a/src/c_api.cc b/src/c_api.cc index ea095ddfb5c7..4155e1ca6f9a 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -665,7 +665,7 @@ int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { int MXListIOIters(mx_uint *out_size, DataIterCreator **out_array) { API_BEGIN(); - auto &vec = Registry::List(); + auto &vec = dmlc::Registry::List(); *out_size = static_cast(vec.size()); *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) API_END(); @@ -674,7 +674,7 @@ int MXListIOIters(mx_uint *out_size, int MXIOIterGetName(DataIterCreator iter, const char **out_name) { API_BEGIN(); - auto *f = static_cast(iter); + auto *f = static_cast(iter); *out_name = f->name.c_str(); API_END(); } @@ -684,8 +684,8 @@ int MXCreateIOIterator(DataIterCreator creator, const char **keys, const char **vals, DataIterHandle *out) { - IOIteratorEntry *e = static_cast(creator); - IIterator *iter = (*e)(); + DataIteratorReg *e = static_cast(creator); + IIterator *iter = e->body(); API_BEGIN(); std::vector > kwargs; for (int i = 0; i < num_param; ++i) { @@ -696,6 +696,11 @@ int MXCreateIOIterator(DataIterCreator creator, API_END_HANDLE_ERROR(delete iter); } +int MXDataIterFree(DataIterHandle iter) { + API_BEGIN(); + delete static_cast *>(symbol); + API_END(); +} diff --git a/src/io/io.cc b/src/io/io.cc index 60c013a812a5..aafe85073a52 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,10 @@ #include "iter_mnist-inl.h" #include "../utils/random.h" +namespace dmlc { +DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); +} // namespace dmlc + namespace mxnet { IIterator *CreateIterator( const std::vector< std::pair > &cfg) { diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 62168f8f1811..6a705c483e5b 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -30,14 +30,20 @@ struct MNISTParam : public dmlc::Parameter { /*! \brief data mode */ int input_flat; // declare parameters in header file - DMLC_DECLARE_PARAMETER(Param) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte"); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte"); - DMLC_DECLARE_FIELD(shuffle).set_default(false); - DMLC_DECLARE_FIELD(silent).set_default(false); - DMLC_DECLARE_FIELD(batch_size).set_default(128); + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1); + .add_enum("noflat", 0).set_default(1) + .describe("Whether to flat the data into 1D."); } }; diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 942398749378..c6fab8d376d7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,15 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -REGISTER_IO_ITER(mnist, MNISTIterator); +MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) + .describe("Create MNISTIterator") + .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From 15442583998e6ec5ee981d05e05628b0badfe02a Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 34/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 110 +++++++++++--------------- include/mxnet/io.h | 4 +- python/mxnet/__init__.py | 1 + python/mxnet/io.py | 166 ++++++++++++++++++++++++++++----------- python/test_io.py | 27 +++---- python/test_mnist.py | 113 ++++++++++++++++++++++++++ src/c_api.cc | 99 +++++++++++------------ src/io/iter_mnist-inl.h | 3 +- src/io/iter_mnist.cc | 2 +- 9 files changed, 343 insertions(+), 182 deletions(-) create mode 100644 python/test_mnist.py diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 88a2934bbbdf..631e0c032852 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -454,64 +454,82 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, // Part 5: IO Interface //-------------------------------------------- /*! - * \brief create an data iterator from configs string - * \param cfg config string that contains the - * configuration about the iterator - * \param out the handle to the iterator + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateFromConfig(const char *cfg, - DataIterHandle *out); +MXNET_DLL int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator * \param out return value of next * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIONext(DataIterHandle handle, +MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! * \brief call iterator.BeforeFirst * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOBeforeFirst(DataIterHandle handle); -/*! - * \brief free the handle to the IO module - * \param handle the handle pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOFree(DataIterHandle handle); +MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); + /*! * \brief get the handle to the NArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetData(DataIterHandle handle, +MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out); /*! * \brief get the handle to the NArray of underlying label @@ -519,39 +537,7 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, * \param out the handle to underlying label NArray * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOGetLabel(DataIterHandle handle, +MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 600978023b5b..4ca5ed05fd18 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -27,10 +27,10 @@ class IIterator : public dmlc::DataIter { */ virtual void SetParam(const char *name, const char *val) = 0; /*! - * \brief init the parameter + * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void InitParams(const std::vector >& kwargs) = 0; + virtual void SetInit(const std::vector >& kwargs) = 0; /*! \brief initalize the iterator so that we can use the iterator */ virtual void Init(void) = 0; /*! \brief set before first of the item */ diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index c7720dcbd935..a8632bfa2ff8 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -12,6 +12,7 @@ from .base import MXNetError from . import narray from . import symbol +from . import io __version__ = "0.1.0" diff --git a/python/mxnet/io.py b/python/mxnet/io.py index ead49f07c4fd..baee99a02d61 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -4,6 +4,7 @@ from __future__ import absolute_import import ctypes +import sys from .base import _LIB from .base import c_array, c_str, mx_uint, string_types from .base import DataIterHandle, NArrayHandle @@ -25,65 +26,42 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. - - - - - - - - - def __init__(self): - """initialize a new dataiter - - """ - self._datahandle = None - - def createfromcfg(self, cfg_path): - """create a dataiter from config file - - cfg_path is the path of configure file - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateFromConfig(ctypes.c_char_p(cfg_path), ctypes.byref(hdl))) - self._datahandle = hdl - - def createbyname(self, iter_name): - """create a dataiter by the name - - iter_name can be mnist imgrec or so on - """ - hdl = DataIterHandle() - check_call(_LIB.MXIOCreateByName(ctypes.c_char_p(iter_name), ctypes.byref(hdl))) - self._datahandle = hdl - - def setparam(self, name, val): - """set param value for dataiter - - name prameter name - val parameter value - """ - check_call(_LIB.MXIOSetParam(self._datahandle, ctypes.c_char_p(name), ctypes.c_char_p(val))) - - def init(self): - """init dataiter - + Parameters + --------- + args: + provide positional arguments, should not be given. + + kwargs: + provide keyword arguments + Returns + ------- + the inited iterator """ - check_call(_LIB.MXIOInit(self._datahandle)) + if len(args) != 0: + raise TypeError('data iterator only accept \ + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) + check_call(_LIB.MXDataIterSetInit( \ + self.handle, num_args, keys, vals)) def beforefirst(self): """set loc to 0 """ - check_call(_LIB.MXIOBeforeFirst(self._datahandle)) + check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): """init dataiter """ next_res = ctypes.c_int(0) - check_call(_LIB.MXIONext(self._datahandle, ctypes.byref(next_res))) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value def getdata(self): @@ -91,7 +69,7 @@ def getdata(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetData(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetData(self.handle, ctypes.byref(hdl))) return NArray(hdl) def getlabel(self): @@ -99,5 +77,97 @@ def getlabel(self): """ hdl = NArrayHandle() - check_call(_LIB.MXIOGetLabel(self._datahandle, ctypes.byref(hdl))) + check_call(_LIB.MXDataIterGetLabel(self.handle, ctypes.byref(hdl))) return NArray(hdl) + +def _make_io_iterator(handle): + """Create an io iterator by handle.""" + name = ctypes.c_char_p() + desc = ctypes.c_char_p() + num_args = mx_uint() + arg_names = ctypes.POINTER(ctypes.c_char_p)() + arg_types = ctypes.POINTER(ctypes.c_char_p)() + arg_descs = ctypes.POINTER(ctypes.c_char_p)() + + check_call(_LIB.MXDataIterGetIterInfo( \ + handle, ctypes.byref(name), ctypes.byref(desc), \ + ctypes.byref(num_args), \ + ctypes.byref(arg_names), \ + ctypes.byref(arg_types), \ + ctypes.byref(arg_descs))) + iter_name = name.value + param_str = [] + for i in range(num_args.value): + ret = '%s : %s' % (arg_names[i], arg_types[i]) + if len(arg_descs[i]) != 0: + ret += '\n ' + arg_descs[i] + param_str.append(ret) + + doc_str = ('%s\n\n' + + 'Parameters\n' + + '----------\n' + + '%s\n' + + 'name : string, required.\n' + + ' Name of the resulting data iterator.\n\n' + + 'Returns\n' + + '-------\n' + + 'iterator: Iterator\n'+ + ' The result iterator.') + doc_str = doc_str % (desc.value, '\n'.join(param_str)) + + def creator(*args, **kwargs): + """Create an iterator. + The parameters listed below can be passed in as keyword arguments. + + Parameters + ---------- + name : string, required. + Name of the resulting data iterator. + + Returns + ------- + symbol: Symbol + the resulting symbol + """ + param_keys = [] + param_vals = [] + symbol_kwargs = {} + name = kwargs.pop('name', None) + + for k, v in kwargs.items(): + param_keys.append(c_str(k)) + param_vals.append(c_str(str(v))) + # create atomic symbol + param_keys = c_array(ctypes.c_char_p, param_keys) + param_vals = c_array(ctypes.c_char_p, param_vals) + iter_handle = DataIterHandle() + check_call(_LIB.MXDataIterCreateIter( + handle, len(param_keys), + param_keys, param_vals, + ctypes.byref(iter_handle))) + + if len(args): + raise TypeError('%s can only accept keyword arguments' % iter_name) + + return DataIter(iter_handle) + + creator.__name__ = iter_name + creator.__doc__ = doc_str + return creator + + +def _init_io_module(): + """List and add all the data iterators to current module.""" + plist = ctypes.POINTER(ctypes.c_void_p)() + size = ctypes.c_uint() + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + + module_obj = sys.modules[__name__] + for i in range(size.value): + hdl = ctypes.c_void_p(plist[i]) + dataiter = _make_io_iterator(hdl) + setattr(module_obj, dataiter.__name__, dataiter) + +# Initialize the io in startups +_init_io_module() diff --git a/python/test_io.py b/python/test_io.py index 6909176d11c2..d15d4cc32fcd 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -1,22 +1,21 @@ #pylint: skip-file import mxnet as mx +import numpy as np +import os -dataiter = mx.io.DataIter() -#a.createfromcfg('/home/tianjun/mxnet/mxnet/MNIST.conf') -dataiter.createbyname('mnist') -dataiter.setparam('path_img', "/home/tianjun/data/mnist/train-images-idx3-ubyte") -dataiter.setparam('path_label', "/home/tianjun/data/mnist/train-labels-idx1-ubyte") -dataiter.setparam('shuffle', '1') -dataiter.setparam('seed_data', '2') -dataiter.setparam('batch_size', '100') - -dataiter.init() +dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") dataiter.beforefirst() -for i in range(100): - dataiter.next() - info = "Batch %d" % (i) +idx = 0 +while dataiter.next(): + info = "Batch %d" % (idx) + idx += 1 print info - label = dataiter.getdata() + ''' + label = dataiter.getlabel() print label.numpy + ''' + diff --git a/python/test_mnist.py b/python/test_mnist.py new file mode 100644 index 000000000000..fa0f29a60033 --- /dev/null +++ b/python/test_mnist.py @@ -0,0 +1,113 @@ +# pylint: skip-file +import mxnet as mx +import numpy as np +import os, cPickle, gzip + +def Softmax(x): + batch, nidden = x.shape + maxes = np.max(x, axis=1) + x -= maxes.reshape(batch, 1) + x = np.exp(x) + norm = np.sum(x, axis=1) + prob = x / norm.reshape((batch, 1)) + return prob + +def CalAcc(out, label): + pred = np.argmax(out, axis=1) + return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] + +def SetGradient(out_grad, label): + assert(out_grad.shape[0] == label.shape[0]) + for i in xrange(label.shape[0]): + k = label[i] + out_grad[i][k] -= 1.0 + +# symbol net +batch_size = 100 +data = mx.symbol.Variable('data') +fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=160) +act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") +fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) +args_list = fc2.list_arguments() +# infer shape +data_shape = (batch_size, 1, 1, 784) +arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] +grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] +mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] +inputs = dict(zip(args_list, arg_narrays)) + +np.random.seed(0) +# set random weight +for name, narray in inputs.items(): + if "weight" in name: + narray.numpy[:, :] = np.random.uniform(-0.001, 0.001, narray.numpy.shape) + if "bias" in name: + narray.numpy[:] = 0.0 + +req = ['write_to' for i in range(len(arg_narrays))] +# bind executer +# TODO(bing): think of a better bind interface +executor = fc2.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req) +# update + +out_narray = executor.heads()[0] +grad_narray = mx.narray.create(out_narray.shape) + +epoch = 10 +momentum = 0.9 +lr = 0.001 +wd = 0.0004 + +def Update(mom, grad, weight): + weight.numpy[:] -= lr * grad.numpy[:] + +block = zip(mom_narrays, grad_narrays, arg_narrays) + + +train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +train_dataiter.beforefirst() +val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=1, input_flat="flat") +val_dataiter.beforefirst() + +for i in xrange(epoch): + # train + print "Epoch %d" % i + train_acc = 0.0 + val_acc = 0.0 + train_nbatch = 0 + val_nbatch = 0 + while train_dataiter.next(): + data = train_dataiter.getdata() + label = train_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy + executor.forward() + out_narray.numpy[:] = Softmax(out_narray.numpy) + train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 + grad_narray.numpy[:] = out_narray.numpy + SetGradient(grad_narray.numpy, label) + executor.backward([grad_narray]) + + for mom, grad, weight in block: + Update(mom, grad, weight) + + # evaluate + while val_dataiter.next(): + data = val_dataiter.getdata() + label = val_dataiter.getlabel().numpy.astype(np.int32) + inputs["data"].numpy[:] = data.numpy + executor.forward() + val_acc += CalAcc(out_narray.numpy, label) + val_nbatch += 1 + print "Train Acc: ", train_acc / train_nbatch + print "Valid Acc: ", val_acc / val_nbatch + train_dataiter.beforefirst() + val_dataiter.beforefirst() + + + diff --git a/src/c_api.cc b/src/c_api.cc index 4155e1ca6f9a..d471d66c4d48 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -612,95 +612,86 @@ int MXExecutorBind(SymbolHandle symbol_handle, API_END(); } -int MXIOCreateFromConfig(const char *cfg, DataIterHandle *out) { +//-------------------------------------------- +// Part 5: IO Interface +//-------------------------------------------- +int MXListDataIters(mx_uint *out_size, + DataIterCreator **out_array) { API_BEGIN(); - *out = static_cast(CreateIteratorFromConfig(cfg)); + auto &vec = dmlc::Registry::List(); + *out_size = static_cast(vec.size()); + *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) API_END(); } -int MXIOCreateByName(const char *iter_name, DataIterHandle *out) { +int MXDataIterGetName(DataIterCreator iter, + const char **out_name) { API_BEGIN(); - *out = static_cast(CreateIteratorByName(iter_name)); + auto *f = static_cast(iter); + *out_name = f->name.c_str(); API_END(); } -int MXIOSetParam(DataIterHandle handle, const char *name, const char *val) { +int MXDataIterGetIterInfo(DataIterCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions) { + DataIteratorReg *e = static_cast(creator); + return MXAPIGetFunctionRegInfo(e, name, description, num_args, + arg_names, arg_type_infos, arg_descriptions); +} + +int MXDataIterCreateIter(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out) { + IIterator *iter = nullptr; API_BEGIN(); - static_cast* >(handle)->SetParam(name, val); - API_END(); + DataIteratorReg *e = static_cast(creator); + iter = e->body(); + std::vector > kwargs; + for (int i = 0; i < num_param; ++i) { + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); + } + iter->SetInit(kwargs); + *out = iter; + API_END_HANDLE_ERROR(delete iter); } -int MXIOInit(DataIterHandle handle) { +int MXDataIterFree(DataIterHandle handle) { API_BEGIN(); - static_cast* >(handle)->Init(); + delete static_cast *>(handle); API_END(); } -int MXIOBeforeFirst(DataIterHandle handle) { +int MXDataIterBeforeFirst(DataIterHandle handle) { API_BEGIN(); static_cast* >(handle)->BeforeFirst(); API_END(); } -int MXIONext(DataIterHandle handle, int *out) { +int MXDataIterNext(DataIterHandle handle, int *out) { API_BEGIN(); *out = static_cast* >(handle)->Next(); API_END(); } -int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out) { +int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out) { API_BEGIN(); DataBatch db = static_cast* >(handle)->Value(); *out = new NArray(db.data[1], 0); API_END(); } -int MXIOGetData(DataIterHandle handle, NArrayHandle *out) { +int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { API_BEGIN(); DataBatch db = static_cast* >(handle)->Value(); *out = new NArray(db.data[0], 0); API_END(); } -int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array) { - API_BEGIN(); - auto &vec = dmlc::Registry::List(); - *out_size = static_cast(vec.size()); - *out_array = (DataIterCreator*)(dmlc::BeginPtr(vec)); // NOLINT(*) - API_END(); -} - -int MXIOIterGetName(DataIterCreator iter, - const char **out_name) { - API_BEGIN(); - auto *f = static_cast(iter); - *out_name = f->name.c_str(); - API_END(); -} - -int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out) { - DataIteratorReg *e = static_cast(creator); - IIterator *iter = e->body(); - API_BEGIN(); - std::vector > kwargs; - for (int i = 0; i < num_param; ++i) { - kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); - } - iter->InitParams(kwargs); - *out = iter; - API_END_HANDLE_ERROR(delete iter); -} - -int MXDataIterFree(DataIterHandle iter) { - API_BEGIN(); - delete static_cast *>(symbol); - API_END(); -} - - diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 6a705c483e5b..88a3e4d82acd 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -106,9 +106,10 @@ class MNISTIterator: public IIterator { virtual const DataBatch &Value(void) const { return out_; } - virtual void InitParams(const std::vector >& kwargs) { + virtual void SetInit(const std::vector >& kwargs) { std::map kmap(kwargs.begin(), kwargs.end()); param.Init(kmap); + this->Init(); } private: inline void LoadImage(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index c6fab8d376d7..d6119d6c8a69 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -10,7 +10,7 @@ namespace mxnet { namespace io { DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNIST, MNISTIterator) +MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); From 36cff22c8ea96342e2d9fd8c9516447db5af6b93 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 35/61] clean io interface --- include/mxnet/io.h | 25 --------------------- python/mxnet/io.py | 14 +++++------- python/test_mnist.py | 4 +--- src/common/utils.h | 6 ++++++ src/io/io.cc | 48 ----------------------------------------- src/io/iter_mnist-inl.h | 25 +++++++++++---------- 6 files changed, 24 insertions(+), 98 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 4ca5ed05fd18..ac22919745a1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -20,12 +20,6 @@ namespace mxnet { template class IIterator : public dmlc::DataIter { public: - /*! - * \brief set the parameter - * \param name name of parameter - * \param val value of parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief set the parameters and init iter * \param kwargs key-value pairs @@ -89,25 +83,6 @@ struct DataBatch { void Naming(std::vector names); }; // struct DataBatch -/*! - * \brief create the databatch iterator IIterator - * \param cfg configure settings key=vale pair - * \return the data IIterator ptr - */ -IIterator *CreateIterator(const std::vector > &cfg); -/*! - * \brief create the databatch iterator IIterator from config file - * \param cfg_path configure file path - * \return the data IIterator ptr - */ -IIterator *CreateIteratorFromConfig(const char* cfg_path); -/*! - * \brief create the databatch iterator IIterator by iter name - * \param iter_name can be mnist, imgrec and so on - * \return the data IIterator ptr - */ -IIterator *CreateIteratorByName(const char* iter_name); - /*! \brief typedef the factory function of data iterator */ typedef IIterator *(*DataIteratorFactory)(); /*! diff --git a/python/mxnet/io.py b/python/mxnet/io.py index baee99a02d61..dba36bd2114c 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -6,7 +6,7 @@ import ctypes import sys from .base import _LIB -from .base import c_array, c_str, mx_uint, string_types +from .base import c_array, c_str, mx_uint from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -43,9 +43,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ - keyword arguments') + keyword arguments') num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -131,8 +131,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] - symbol_kwargs = {} - name = kwargs.pop('name', None) for k, v in kwargs.items(): param_keys.append(c_str(k)) @@ -160,9 +158,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - + check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/python/test_mnist.py b/python/test_mnist.py index fa0f29a60033..8c3e09ba3705 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -67,7 +67,7 @@ def Update(mom, grad, weight): train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") + batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) train_dataiter.beforefirst() val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", @@ -109,5 +109,3 @@ def Update(mom, grad, weight): train_dataiter.beforefirst() val_dataiter.beforefirst() - - diff --git a/src/common/utils.h b/src/common/utils.h index f55ebc26535f..cf1fd2f1bb36 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -10,12 +10,18 @@ #include #include #include +#include #endif // DMLC_USE_CXX11 namespace common { #if DMLC_USE_CXX11 +/*! + * \brief Random Engine + */ +typedef std::mt19937 RANDOM_ENGINE; + /*! * \brief Helper functions. */ diff --git a/src/io/io.cc b/src/io/io.cc index aafe85073a52..fb7a8c2d3092 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -11,55 +11,7 @@ #include #include #include "iter_mnist-inl.h" -#include "../utils/random.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); } // namespace dmlc - -namespace mxnet { - IIterator *CreateIterator( - const std::vector< std::pair > &cfg) { - size_t i = 0; - IIterator *it = NULL; - for (; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "iter")) { - if (!strcmp(val, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); continue; - } - CHECK(!strcmp(val, "mnist")) << "Currently only have mnist iterator"; - } - if (it != NULL) { - it->SetParam(name, val); - } - } - CHECK(it != NULL) << "must specify iterator by iter=itername"; - return it; - } - - IIterator *CreateIteratorFromConfig(const char* cfg_path) { - std::ifstream ifs(cfg_path, std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - dmlc::Config cfg(ifs); - for (dmlc::Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - dmlc::Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - return CreateIterator(itcfg); - } - - IIterator *CreateIteratorByName(const char* iter_name) { - IIterator *it = NULL; - // Currently only support mnist - if (!strcmp(iter_name, "mnist")) { - CHECK(it == NULL) << "mnist cannot chain over other iterator"; - it = new io::MNISTIterator(); - } - CHECK(!strcmp(iter_name, "mnist")) << "Currently only have mnist iterator"; - return it; - } -} // namespace mxnet diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 88a3e4d82acd..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -13,7 +13,9 @@ #include #include #include -#include "../utils/random.h" +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { @@ -29,6 +31,8 @@ struct MNISTParam : public dmlc::Parameter { int batch_size; /*! \brief data mode */ int input_flat; + /*! \brief random seed */ + int seed_data; // declare parameters in header file DMLC_DECLARE_PARAMETER(MNISTParam) { DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") @@ -36,33 +40,29 @@ struct MNISTParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") .describe("Mnist label path."); DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); + .describe("Whether to shuffle data."); DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); + .describe("Whether to print out data info."); DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) .describe("Batch Size."); DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) .add_enum("noflat", 0).set_default(1) .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed_data).set_default(0) + .describe("Random Seed."); } }; - + class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); @@ -111,6 +111,7 @@ class MNISTIterator: public IIterator { param.Init(kmap); this->Init(); } + private: inline void LoadImage(void) { dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); @@ -151,7 +152,7 @@ class MNISTIterator: public IIterator { delete stdlabel; } inline void Shuffle(void) { - rnd.Shuffle(&inst_); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { @@ -191,8 +192,6 @@ class MNISTIterator: public IIterator { unsigned inst_offset_; /*! \brief instance index */ std::vector inst_; - // random sampler - utils::RandomSampler rnd; // magic number to setup randomness static const int kRandMagic = 0; }; // class MNISTIterator From 3ee7f24e6b9b46f1ac636286dc82837be1f0b997 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 36/61] finish merge remote master --- Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Makefile b/Makefile index 4318d384cff1..8d2ce8fb9b9c 100644 --- a/Makefile +++ b/Makefile @@ -101,18 +101,12 @@ pooling_cpu.o: src/operator/pooling.cc pooling_gpu.o: src/operator/pooling.cu softmax_cpu.o: src/operator/softmax.cc softmax_gpu.o: src/operator/softmax.cu -<<<<<<< HEAD convolution_cpu.o: src/operator/convolution.cc convolution_gpu.o: src/operator/convolution.cu reshape_cpu.o: src/operator/reshape.cc reshape_gpu.o: src/operator/reshape.cu -======= io.o: src/io/io.cc -<<<<<<< HEAD ->>>>>>> finish merge remote master -======= iter_mnist.o: src/io/iter_mnist.cc ->>>>>>> finish old version registry in C lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) From 88d2db78b9375e33559361a0d912b06388221289 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 37/61] built in python, start polishing new feature required --- include/mxnet/c_api.h | 23 +++++++++++++++++++++++ python/test_io.py | 1 - src/c_api.cc | 1 - 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 631e0c032852..0ff3adf54d08 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -508,6 +508,29 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/test_io.py b/python/test_io.py index d15d4cc32fcd..940a4baa5c0d 100644 --- a/python/test_io.py +++ b/python/test_io.py @@ -18,4 +18,3 @@ label = dataiter.getlabel() print label.numpy ''' - diff --git a/src/c_api.cc b/src/c_api.cc index d471d66c4d48..2351eacf8366 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include From 2ff4ff0d1908f3458f3e78ff9f57971564854980 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 38/61] finish old version registry in C --- include/mxnet/c_api.h | 32 ++++++++++++++++++++++++++++++++ src/c_api.cc | 1 + src/io/iter_mnist.cc | 2 +- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 0ff3adf54d08..5bf0aeba584e 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -562,5 +562,37 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/src/c_api.cc b/src/c_api.cc index 2351eacf8366..d471d66c4d48 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index d6119d6c8a69..f3906ca410e7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,6 +4,7 @@ * \brief register mnist iterator * \author Tianjun Xiao */ +#include #include "./iter_mnist-inl.h" namespace mxnet { @@ -13,6 +14,5 @@ DMLC_REGISTER_PARAMETER(MNISTParam); MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) .describe("Create MNISTIterator") .add_arguments(MNISTParam::__FIELDS__()); - } // namespace io } // namespace mxnet From 9b002de533a5d54052c70e5047f3f3fb2cfb6285 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 39/61] modify to dmlc registry --- src/io/iter_mnist.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index f3906ca410e7..3ddda17a10af 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,7 +4,6 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { From c29793f92fe53a7bdb0317aca6d52b8ba65e59e8 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 40/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 84 ++++++++++++++++++++----------------------- python/mxnet/io.py | 3 ++ src/c_api.cc | 2 -- 3 files changed, 41 insertions(+), 48 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5bf0aeba584e..9347b62db4cb 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -509,28 +509,52 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator @@ -562,37 +586,5 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index dba36bd2114c..31b1e31a7fd5 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,6 +26,9 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. diff --git a/src/c_api.cc b/src/c_api.cc index d471d66c4d48..5965b6a4fab2 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -693,5 +693,3 @@ int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out) { *out = new NArray(db.data[0], 0); API_END(); } - - From 56b4129fb265edde3e6a84104debb742248b5899 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 41/61] clean io interface --- python/mxnet/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 31b1e31a7fd5..e538ea9df37f 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. From e48e4002003d1d22cdafaab87c4d62e838293d43 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:17:01 +0800 Subject: [PATCH 42/61] modify to pass travis --- python/mxnet/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index e538ea9df37f..2c9618b8089e 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -135,9 +135,9 @@ def creator(*args, **kwargs): param_keys = [] param_vals = [] - for k, v in kwargs.items(): + for k, val in kwargs.items(): param_keys.append(c_str(k)) - param_vals.append(c_str(str(v))) + param_vals.append(c_str(str(val))) # create atomic symbol param_keys = c_array(ctypes.c_char_p, param_keys) param_vals = c_array(ctypes.c_char_p, param_vals) From 49728eba50df55ef5adf7e207abf23ebe31ef01c Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 43/61] finish merge remote master --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 8d2ce8fb9b9c..bdcbe56a1d24 100644 --- a/Makefile +++ b/Makefile @@ -112,8 +112,6 @@ lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) $(LIB_DEP) test/test_storage: test/test_storage.cc lib/libmxnet.a -#test/io_mnist_test: test/io_mnist_test.cc lib/libmxnet.a $(DMLC_CORE)/libdmlc.a -#test/test_threaded_engine: test/test_threaded_engine.cc api/libmxnet.a $(BIN) : $(CXX) $(CFLAGS) -std=c++0x -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) From cb99a1ed868af0bf6df30700da419ade8801ede3 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sat, 22 Aug 2015 13:05:57 +0800 Subject: [PATCH 44/61] built in python, start polishing new feature required --- include/mxnet/c_api.h | 23 +++++++++++++++++++++++ python/mxnet/io.py | 3 --- src/c_api.cc | 1 - 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 9347b62db4cb..9006402091f3 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -555,6 +555,29 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); +/*! + * \brief create an data iterator by name + * \param iter_name iterator name + * \param out the handle to the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOCreateByName(const char *iter_name, + DataIterHandle *out); +/*! + * \brief set parameter value + * \param handle the handle to iterator + * \param name parameter name + * \param val parameter value + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOSetParam(DataIterHandle handle, + const char *name, const char *val); +/*! + * \brief Init after set parameter + * \param handle the handle to iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOInit(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 2c9618b8089e..906843e5d0c5 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -27,9 +27,6 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - def __call__(self, *args, **kwargs): - """Invoke iterator as function on inputs. Init params. - def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. diff --git a/src/c_api.cc b/src/c_api.cc index 5965b6a4fab2..58ccc1adceb8 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include From 24ad6f58f2870c717e380cf78fc48d0ee6641daf Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 45/61] finish old version registry in C --- include/mxnet/c_api.h | 32 ++++++++++++++++++++++++++++++++ src/c_api.cc | 1 + src/io/iter_mnist.cc | 1 + 3 files changed, 34 insertions(+) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 9006402091f3..d1790f49decc 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -609,5 +609,37 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/src/c_api.cc b/src/c_api.cc index 58ccc1adceb8..5965b6a4fab2 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 3ddda17a10af..f3906ca410e7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,6 +4,7 @@ * \brief register mnist iterator * \author Tianjun Xiao */ +#include #include "./iter_mnist-inl.h" namespace mxnet { From 393134316581724f906e824410e130377ee4fb2b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 46/61] modify to dmlc registry --- src/io/iter_mnist.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index f3906ca410e7..3ddda17a10af 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,7 +4,6 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { From b1e2bd987be3dd0525a6f01a3e0d467615556ebf Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 47/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 84 ++++++++++++++++++++----------------------- include/mxnet/io.h | 7 ++++ python/mxnet/io.py | 3 ++ python/test_mnist.py | 1 - 4 files changed, 48 insertions(+), 47 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index d1790f49decc..37a192498ea6 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -556,28 +556,52 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief create an data iterator by name - * \param iter_name iterator name - * \param out the handle to the iterator + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOCreateByName(const char *iter_name, - DataIterHandle *out); +MXNET_DLL int MXDataIterGetName(DataIterCreator iter, + const char **out_name); /*! - * \brief set parameter value - * \param handle the handle to iterator - * \param name parameter name - * \param val parameter value + * \brief init an iterator, init with parameters + * the array size of passed in arguments + * \param handle of the iterator creator + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out resulting iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOSetParam(DataIterHandle handle, - const char *name, const char *val); +MXNET_DLL int MXDataIterCreateIter(DataIterCreator handle, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); /*! - * \brief Init after set parameter - * \param handle the handle to iterator + * \brief Get the detailed information about data iterator. + * \param creator the DataIterCreator. + * \param name The returned name of the creator. + * \param description The returned description of the symbol. + * \param num_args Number of arguments. + * \param arg_names Name of the arguments. + * \param arg_type_infos Type informations about the arguments. + * \param arg_descriptions Description information about the arguments. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, + const char **name, + const char **description, + mx_uint *num_args, + const char ***arg_names, + const char ***arg_type_infos, + const char ***arg_descriptions); +/*! + * \brief free the handle to the IO module + * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXIOInit(DataIterHandle handle); +MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! * \brief move iterator to next position * \param handle the handle to iterator @@ -609,37 +633,5 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index ac22919745a1..4c53c51c3a02 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -21,6 +21,13 @@ template class IIterator : public dmlc::DataIter { public: /*! + * \brief set the parameter + * \param name name of parameter + * \param val value of parameter + */ + virtual void SetParam(const char *name, const char *val) = 0; + /*! +>>>>>>> pass python mnist test, begin cleaning * \brief set the parameters and init iter * \param kwargs key-value pairs */ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 906843e5d0c5..d43bb3cd9f47 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,6 +26,9 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) + + def __call__(self, *args, **kwargs): + """Invoke iterator as function on inputs. Init params. def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. diff --git a/python/test_mnist.py b/python/test_mnist.py index 8c3e09ba3705..64311a438005 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -108,4 +108,3 @@ def Update(mom, grad, weight): print "Valid Acc: ", val_acc / val_nbatch train_dataiter.beforefirst() val_dataiter.beforefirst() - From 0e99411a8fcdc27eeade9a245ecb8dc857b5743f Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 48/61] clean io interface --- include/mxnet/io.h | 7 ------- python/mxnet/io.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/mxnet/io.h b/include/mxnet/io.h index 4c53c51c3a02..ac22919745a1 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -21,13 +21,6 @@ template class IIterator : public dmlc::DataIter { public: /*! - * \brief set the parameter - * \param name name of parameter - * \param val value of parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; - /*! ->>>>>>> pass python mnist test, begin cleaning * \brief set the parameters and init iter * \param kwargs key-value pairs */ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index d43bb3cd9f47..2c9618b8089e 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. From 51b7208ba7d1399531b126419b97cd9546f5b9ce Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:40:01 +0800 Subject: [PATCH 49/61] finish merge remote master --- src/io/iter_mnist-inl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index ef2348488396..ca88b4762c74 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -57,12 +57,27 @@ class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; + mode_ = 1; inst_offset_ = 0; + silent_ = 0; + shuffle_ = 0; + rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "silent")) silent_ = atoi(val); + if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); + if (!strcmp(name, "input_flat")) mode_ = atoi(val); + if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); + if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "path_label")) path_label = val; + if (!strcmp(name, "path_img")) path_img = val; + if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); From 358a6239e2e85dd26e69487474d807c97e610f1b Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 12:28:47 +0800 Subject: [PATCH 50/61] finish old version registry in C --- include/mxnet/c_api.h | 32 ++++++++++++++++++++++++++++++++ src/io/iter_mnist-inl.h | 15 +++------------ src/io/iter_mnist.cc | 1 + 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 37a192498ea6..b5c2d43dd195 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -633,5 +633,37 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); +/*! + * \brief list all the available iterator entries + * \param out_size the size of returned iterators + * \param out_array the output iteratos entries + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXListIOIters(mx_uint *out_size, + DataIterCreator **out_array); +/*! + * \brief get the name of iterator entry + * \param iter iterator entry + * \param out_name the name of the iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIOIterGetName(DataIterCreator iter, + const char **out_name); +/*! + * \brief create an iterator, init with parameters + * the array size of passed in arguments + * \param creator IOIterator Enrty + * \param num_param number of parameter + * \param keys parameter keys + * \param vals parameter values + * \param out pointer to the data iterator + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, + int num_param, + const char **keys, + const char **vals, + DataIterHandle *out); + #endif // MXNET_C_API_H_ diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index ca88b4762c74..9bc40b06d270 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -57,10 +57,7 @@ class MNISTIterator: public IIterator { public: MNISTIterator(void) { img_.dptr_ = NULL; - mode_ = 1; inst_offset_ = 0; - silent_ = 0; - shuffle_ = 0; rnd.Seed(kRandMagic); out_.data.resize(2); } @@ -68,15 +65,9 @@ class MNISTIterator: public IIterator { if (img_.dptr_ != NULL) delete []img_.dptr_; } virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "silent")) silent_ = atoi(val); - if (!strcmp(name, "batch_size")) batch_size_ = (index_t)atoi(val); - if (!strcmp(name, "input_flat")) mode_ = atoi(val); - if (!strcmp(name, "shuffle")) shuffle_ = atoi(val); - if (!strcmp(name, "index_offset")) inst_offset_ = atoi(val); - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "path_label")) path_label = val; - if (!strcmp(name, "path_img")) path_img = val; - if (!strcmp(name, "seed_data")) rnd.Seed(kRandMagic + atoi(val)); + std::map kwargs; + kwargs[name] = val; + param.Init(kwargs); } // intialize iterator loads data in virtual void Init(void) { diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 3ddda17a10af..f3906ca410e7 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,6 +4,7 @@ * \brief register mnist iterator * \author Tianjun Xiao */ +#include #include "./iter_mnist-inl.h" namespace mxnet { From 94c6872ad31004b36a82e1b6003978ef4ba2ad91 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 23 Aug 2015 23:21:49 +0800 Subject: [PATCH 51/61] modify to dmlc registry --- src/io/iter_mnist.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index f3906ca410e7..3ddda17a10af 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,7 +4,6 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include "./iter_mnist-inl.h" namespace mxnet { From 8caa4acb7168e14d65f0b64d042ac6b05f27f40c Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 03:20:11 +0800 Subject: [PATCH 52/61] pass python mnist test, begin cleaning --- include/mxnet/c_api.h | 32 -------------------------------- python/mxnet/io.py | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index b5c2d43dd195..37a192498ea6 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -633,37 +633,5 @@ MXNET_DLL int MXDataIterGetData(DataIterHandle handle, */ MXNET_DLL int MXDataIterGetLabel(DataIterHandle handle, NArrayHandle *out); -/*! - * \brief list all the available iterator entries - * \param out_size the size of returned iterators - * \param out_array the output iteratos entries - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXListIOIters(mx_uint *out_size, - DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXIOIterGetName(DataIterCreator iter, - const char **out_name); -/*! - * \brief create an iterator, init with parameters - * the array size of passed in arguments - * \param creator IOIterator Enrty - * \param num_param number of parameter - * \param keys parameter keys - * \param vals parameter values - * \param out pointer to the data iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXCreateIOIterator(DataIterCreator creator, - int num_param, - const char **keys, - const char **vals, - DataIterHandle *out); - #endif // MXNET_C_API_H_ diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 2c9618b8089e..89b369ce5617 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -46,9 +46,15 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ +<<<<<<< HEAD keyword arguments') num_args = len(kwargs) keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) +======= + keyword arguments') + num_args = len(kwargs) + keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) +>>>>>>> pass python mnist test, begin cleaning vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -134,6 +140,11 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] +<<<<<<< HEAD +======= + symbol_kwargs = {} + name = kwargs.pop('name', None) +>>>>>>> pass python mnist test, begin cleaning for k, val in kwargs.items(): param_keys.append(c_str(k)) @@ -161,7 +172,13 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() +<<<<<<< HEAD check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) +======= + + check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) + +>>>>>>> pass python mnist test, begin cleaning module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) From 3fc60b14e154b25375810afacf95b09218ad187d Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Mon, 24 Aug 2015 04:07:14 +0800 Subject: [PATCH 53/61] clean io interface --- python/mxnet/io.py | 19 +------------------ src/io/iter_mnist-inl.h | 6 ------ 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 89b369ce5617..e4d64efe0268 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -26,7 +26,7 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - + def __call__(self, *args, **kwargs): """Invoke iterator as function on inputs. Init params. @@ -46,15 +46,9 @@ def __call__(self, *args, **kwargs): """ if len(args) != 0: raise TypeError('data iterator only accept \ -<<<<<<< HEAD - keyword arguments') - num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) -======= keyword arguments') num_args = len(kwargs) keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) ->>>>>>> pass python mnist test, begin cleaning vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) check_call(_LIB.MXDataIterSetInit( \ self.handle, num_args, keys, vals)) @@ -140,11 +134,6 @@ def creator(*args, **kwargs): """ param_keys = [] param_vals = [] -<<<<<<< HEAD -======= - symbol_kwargs = {} - name = kwargs.pop('name', None) ->>>>>>> pass python mnist test, begin cleaning for k, val in kwargs.items(): param_keys.append(c_str(k)) @@ -172,13 +161,7 @@ def _init_io_module(): """List and add all the data iterators to current module.""" plist = ctypes.POINTER(ctypes.c_void_p)() size = ctypes.c_uint() -<<<<<<< HEAD check_call(_LIB.MXListDataIters(ctypes.byref(size), ctypes.byref(plist))) -======= - - check_call(_LIB.MXListDataIters(ctypes.byref(size),ctypes.byref(plist))) - ->>>>>>> pass python mnist test, begin cleaning module_obj = sys.modules[__name__] for i in range(size.value): hdl = ctypes.c_void_p(plist[i]) diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h index 9bc40b06d270..ef2348488396 100644 --- a/src/io/iter_mnist-inl.h +++ b/src/io/iter_mnist-inl.h @@ -58,17 +58,11 @@ class MNISTIterator: public IIterator { MNISTIterator(void) { img_.dptr_ = NULL; inst_offset_ = 0; - rnd.Seed(kRandMagic); out_.data.resize(2); } virtual ~MNISTIterator(void) { if (img_.dptr_ != NULL) delete []img_.dptr_; } - virtual void SetParam(const char *name, const char *val) { - std::map kwargs; - kwargs[name] = val; - param.Init(kwargs); - } // intialize iterator loads data in virtual void Init(void) { this->LoadImage(); From af128923d01c5a1d7c6e785986aa5458c2e98e5e Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 11:45:53 +0800 Subject: [PATCH 54/61] finish refactoring io code --- include/mxnet/c_api.h | 10 +- include/mxnet/io.h | 6 +- python/mxnet/io.py | 52 +++++------ python/test_mnist.py | 35 ++++--- src/c_api.cc | 11 +-- src/io/iter_mnist-inl.h | 200 ---------------------------------------- src/io/iter_mnist.cc | 188 ++++++++++++++++++++++++++++++++++++- 7 files changed, 232 insertions(+), 270 deletions(-) delete mode 100644 src/io/iter_mnist-inl.h diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 37a192498ea6..3c1213fafc17 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -461,14 +461,6 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, */ MXNET_DLL int MXListDataIters(mx_uint *out_size, DataIterCreator **out_array); -/*! - * \brief get the name of iterator entry - * \param iter iterator entry - * \param out_name the name of the iterator - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXDataIterGetName(DataIterCreator iter, - const char **out_name); /*! * \brief init an iterator, init with parameters * the array size of passed in arguments @@ -611,7 +603,7 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! - * \brief call iterator.BeforeFirst + * \brief call iterator.Reset * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ diff --git a/include/mxnet/io.h b/include/mxnet/io.h index ac22919745a1..47a59eec54fe 100644 --- a/include/mxnet/io.h +++ b/include/mxnet/io.h @@ -24,10 +24,8 @@ class IIterator : public dmlc::DataIter { * \brief set the parameters and init iter * \param kwargs key-value pairs */ - virtual void SetInit(const std::vector >& kwargs) = 0; - /*! \brief initalize the iterator so that we can use the iterator */ - virtual void Init(void) = 0; - /*! \brief set before first of the item */ + virtual void Init(const std::vector >& kwargs) = 0; + /*! \brief reset the iterator */ virtual void BeforeFirst(void) = 0; /*! \brief move to next item */ virtual bool Next(void) = 0; diff --git a/python/mxnet/io.py b/python/mxnet/io.py index e4d64efe0268..245476be1397 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -27,31 +27,11 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXDataIterFree(self.handle)) - def __call__(self, *args, **kwargs): - """Invoke iterator as function on inputs. Init params. + def __iter__(self): + """make the class iterable - def __call__(self, *args, **kwargs): - """Invoke iterator as function on inputs. Init params. - - Parameters - --------- - args: - provide positional arguments, should not be given. - - kwargs: - provide keyword arguments - Returns - ------- - the inited iterator """ - if len(args) != 0: - raise TypeError('data iterator only accept \ - keyword arguments') - num_args = len(kwargs) - keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()]) - vals = c_array(ctypes.c_char_p, [c_str(val) for val in kwargs.values()]) - check_call(_LIB.MXDataIterSetInit( \ - self.handle, num_args, keys, vals)) + return self def beforefirst(self): """set loc to 0 @@ -60,13 +40,31 @@ def beforefirst(self): check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): - """init dataiter + """get next data from iterator + + Returns + ------- + labels and images for the next batch + """ + next_res = ctypes.c_int(0) + check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) + if next_res.value: + return self.getdata(), self.getlabel() + else: + self.reset() + raise StopIteration + + def iter_next(self): + """iterate to next data with return value + Returns + ------- + return true if success """ next_res = ctypes.c_int(0) check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value - + def getdata(self): """get data from batch @@ -129,8 +127,8 @@ def creator(*args, **kwargs): Returns ------- - symbol: Symbol - the resulting symbol + dataiter: Dataiter + the resulting data iterator """ param_keys = [] param_vals = [] diff --git a/python/test_mnist.py b/python/test_mnist.py index 64311a438005..9703a06e549b 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -65,14 +65,14 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat", seed_data=1) -train_dataiter.beforefirst() -val_dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") -val_dataiter.beforefirst() +train_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/train-images-idx3-ubyte", + label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=0, flat=1, seed=1) +val_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=100, shuffle=1, silent=0, flat=1) for i in xrange(epoch): # train @@ -81,10 +81,11 @@ def Update(mom, grad, weight): val_acc = 0.0 train_nbatch = 0 val_nbatch = 0 - while train_dataiter.next(): - data = train_dataiter.getdata() - label = train_dataiter.getlabel().numpy.astype(np.int32) - inputs["data"].numpy[:] = data.numpy + + for data, label in train_dataiter: + data = data.numpy + label = label.numpy.astype(np.int32) + inputs["data"].numpy[:] = data executor.forward() out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) @@ -97,14 +98,12 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - while val_dataiter.next(): - data = val_dataiter.getdata() - label = val_dataiter.getlabel().numpy.astype(np.int32) - inputs["data"].numpy[:] = data.numpy + for data, label in val_dataiter: + data = data.numpy + label = label.numpy.astype(np.int32) + inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) val_nbatch += 1 print "Train Acc: ", train_acc / train_nbatch print "Valid Acc: ", val_acc / val_nbatch - train_dataiter.beforefirst() - val_dataiter.beforefirst() diff --git a/src/c_api.cc b/src/c_api.cc index 5965b6a4fab2..4d7381fde0f1 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -624,14 +624,6 @@ int MXListDataIters(mx_uint *out_size, API_END(); } -int MXDataIterGetName(DataIterCreator iter, - const char **out_name) { - API_BEGIN(); - auto *f = static_cast(iter); - *out_name = f->name.c_str(); - API_END(); -} - int MXDataIterGetIterInfo(DataIterCreator creator, const char **name, const char **description, @@ -657,7 +649,8 @@ int MXDataIterCreateIter(DataIterCreator creator, for (int i = 0; i < num_param; ++i) { kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); } - iter->SetInit(kwargs); + iter->Init(kwargs); + iter->BeforeFirst(); *out = iter; API_END_HANDLE_ERROR(delete iter); } diff --git a/src/io/iter_mnist-inl.h b/src/io/iter_mnist-inl.h deleted file mode 100644 index ef2348488396..000000000000 --- a/src/io/iter_mnist-inl.h +++ /dev/null @@ -1,200 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file iter_mnist-inl.h - * \brief iterator that takes mnist dataset - */ -#ifndef MXNET_IO_ITER_MNIST_INL_H_ -#define MXNET_IO_ITER_MNIST_INL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../common/utils.h" - -namespace mxnet { -namespace io { -// Define mnist io parameters -struct MNISTParam : public dmlc::Parameter { - /*! \brief path */ - std::string path_img, path_label; - /*! \brief whether to do shuffle */ - bool shuffle; - /*! \brief whether to print info */ - bool silent; - /*! \brief batch size */ - int batch_size; - /*! \brief data mode */ - int input_flat; - /*! \brief random seed */ - int seed_data; - // declare parameters in header file - DMLC_DECLARE_PARAMETER(MNISTParam) { - DMLC_DECLARE_FIELD(path_img).set_default("./train-images-idx3-ubyte") - .describe("Mnist image path."); - DMLC_DECLARE_FIELD(path_label).set_default("./train-labels-idx1-ubyte") - .describe("Mnist label path."); - DMLC_DECLARE_FIELD(shuffle).set_default(false) - .describe("Whether to shuffle data."); - DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Whether to print out data info."); - DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) - .describe("Batch Size."); - DMLC_DECLARE_FIELD(input_flat).add_enum("flat", 1) - .add_enum("noflat", 0).set_default(1) - .describe("Whether to flat the data into 1D."); - DMLC_DECLARE_FIELD(seed_data).set_default(0) - .describe("Random Seed."); - } -}; - -class MNISTIterator: public IIterator { - public: - MNISTIterator(void) { - img_.dptr_ = NULL; - inst_offset_ = 0; - out_.data.resize(2); - } - virtual ~MNISTIterator(void) { - if (img_.dptr_ != NULL) delete []img_.dptr_; - } - // intialize iterator loads data in - virtual void Init(void) { - this->LoadImage(); - this->LoadLabel(); - // set name - this->SetDataName(std::string("data")); - this->SetDataName(std::string("label")); - if (param.input_flat == 1) { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); - } else { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); - } - out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); - batch_label_.stride_ = 1; - batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = param.batch_size; - if (param.shuffle) this->Shuffle(); - if (param.silent == 0) { - mshadow::Shape<4> s = batch_data_.shape_; - printf("MNISTIterator: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); - } - } - virtual void BeforeFirst(void) { - this->loc_ = 0; - } - virtual bool Next(void) { - if (loc_ + param.batch_size <= img_.size(0)) { - batch_data_.dptr_ = img_[loc_].dptr_; - batch_label_.dptr_ = &labels_[loc_]; - out_.data[0] = TBlob(batch_data_); - out_.data[1] = TBlob(batch_label_); - out_.inst_index = &inst_[loc_]; - loc_ += param.batch_size; - return true; - } else { - return false; - } - } - virtual const DataBatch &Value(void) const { - return out_; - } - virtual void SetInit(const std::vector >& kwargs) { - std::map kmap(kwargs.begin(), kwargs.end()); - param.Init(kmap); - this->Init(); - } - - private: - inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(param.path_img.c_str(), "r"); - ReadInt(stdimg); - int image_count = ReadInt(stdimg); - int image_rows = ReadInt(stdimg); - int image_cols = ReadInt(stdimg); - - img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); - img_.stride_ = img_.size(2); - - // allocate continuous memory - img_.dptr_ = new float[img_.MSize()]; - for (int i = 0; i < image_count; ++i) { - for (int j = 0; j < image_rows; ++j) { - for (int k = 0; k < image_cols; ++k) { - unsigned char ch; - CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); - img_[i][j][k] = ch; - } - } - } - // normalize to 0-1 - img_ *= 1.0f / 256.0f; - delete stdimg; - } - inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(param.path_label.c_str(), "r"); - ReadInt(stdlabel); - int labels_count = ReadInt(stdlabel); - labels_.resize(labels_count); - for (int i = 0; i < labels_count; ++i) { - unsigned char ch; - CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); - labels_[i] = ch; - inst_.push_back((unsigned)i + inst_offset_); - } - delete stdlabel; - } - inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed_data)); - std::vector tmplabel(labels_.size()); - mshadow::TensorContainer tmpimg(img_.shape_); - for (size_t i = 0; i < inst_.size(); ++i) { - unsigned ridx = inst_[i] - inst_offset_; - mshadow::Copy(tmpimg[i], img_[ridx]); - tmplabel[i] = labels_[ridx]; - } - // copy back - mshadow::Copy(img_, tmpimg); - labels_ = tmplabel; - } - - private: - inline static int ReadInt(dmlc::Stream *fi) { - unsigned char buf[4]; - CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) - << "invalid mnist format"; - return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); - } - - private: - /*! \brief MNIST iter params */ - MNISTParam param; - /*! \brief output */ - DataBatch out_; - /*! \brief current location */ - index_t loc_; - /*! \brief image content */ - mshadow::Tensor img_; - /*! \brief label content */ - std::vector labels_; - /*! \brief batch data tensor */ - mshadow::Tensor batch_data_; - /*! \brief batch label tensor */ - mshadow::Tensor batch_label_; - /*! \brief instance index offset */ - unsigned inst_offset_; - /*! \brief instance index */ - std::vector inst_; - // magic number to setup randomness - static const int kRandMagic = 0; -}; // class MNISTIterator -} // namespace io -} // namespace mxnet -#endif // MXNET_IO_ITER_MNIST_INL_H_ diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 3ddda17a10af..b48064de184a 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,14 +4,196 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include "./iter_mnist-inl.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/utils.h" namespace mxnet { namespace io { +// Define mnist io parameters +struct MNISTParam : public dmlc::Parameter { + /*! \brief path */ + std::string image, label; + /*! \brief whether to do shuffle */ + bool shuffle; + /*! \brief whether to print info */ + bool silent; + /*! \brief batch size */ + int batch_size; + /*! \brief data mode */ + bool flat; + /*! \brief random seed */ + int seed; + // declare parameters in header file + DMLC_DECLARE_PARAMETER(MNISTParam) { + DMLC_DECLARE_FIELD(image).set_default("./train-images-idx3-ubyte") + .describe("Mnist image path."); + DMLC_DECLARE_FIELD(label).set_default("./train-labels-idx1-ubyte") + .describe("Mnist label path."); + DMLC_DECLARE_FIELD(shuffle).set_default(false) + .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(silent).set_default(false) + .describe("Whether to print out data info."); + DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) + .describe("Batch Size."); + DMLC_DECLARE_FIELD(flat).set_default(true) + .describe("Whether to flat the data into 1D."); + DMLC_DECLARE_FIELD(seed).set_default(0) + .describe("Random Seed."); + } +}; + +class MNISTIter: public IIterator { + public: + MNISTIter(void) { + img_.dptr_ = NULL; + inst_offset_ = 0; + out_.data.resize(2); + } + virtual ~MNISTIter(void) { + if (img_.dptr_ != NULL) delete []img_.dptr_; + } + // intialize iterator loads data in + virtual void Init(const std::vector >& kwargs) { + std::map kmap(kwargs.begin(), kwargs.end()); + param.Init(kmap); + this->LoadImage(); + this->LoadLabel(); + // set name + this->SetDataName(std::string("data")); + this->SetDataName(std::string("label")); + if (param.flat) { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); + } else { + batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); + } + out_.inst_index = NULL; + batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); + batch_label_.stride_ = 1; + batch_data_.stride_ = batch_data_.size(3); + out_.batch_size = param.batch_size; + if (param.shuffle) this->Shuffle(); + if (param.silent == 0) { + mshadow::Shape<4> s = batch_data_.shape_; + printf("MNISTIter: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", + (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); + } + } + virtual void BeforeFirst(void) { + this->loc_ = 0; + } + virtual bool Next(void) { + if (loc_ + param.batch_size <= img_.size(0)) { + batch_data_.dptr_ = img_[loc_].dptr_; + batch_label_.dptr_ = &labels_[loc_]; + out_.data[0] = TBlob(batch_data_); + out_.data[1] = TBlob(batch_label_); + out_.inst_index = &inst_[loc_]; + loc_ += param.batch_size; + return true; + } else { + return false; + } + } + virtual const DataBatch &Value(void) const { + return out_; + } + + private: + inline void LoadImage(void) { + dmlc::Stream *stdimg = dmlc::Stream::Create(param.image.c_str(), "r"); + ReadInt(stdimg); + int image_count = ReadInt(stdimg); + int image_rows = ReadInt(stdimg); + int image_cols = ReadInt(stdimg); + + img_.shape_ = mshadow::Shape3(image_count, image_rows, image_cols); + img_.stride_ = img_.size(2); + + // allocate continuous memory + img_.dptr_ = new float[img_.MSize()]; + for (int i = 0; i < image_count; ++i) { + for (int j = 0; j < image_rows; ++j) { + for (int k = 0; k < image_cols; ++k) { + unsigned char ch; + CHECK(stdimg->Read(&ch, sizeof(ch) != 0)); + img_[i][j][k] = ch; + } + } + } + // normalize to 0-1 + img_ *= 1.0f / 256.0f; + delete stdimg; + } + inline void LoadLabel(void) { + dmlc::Stream *stdlabel = dmlc::Stream::Create(param.label.c_str(), "r"); + ReadInt(stdlabel); + int labels_count = ReadInt(stdlabel); + labels_.resize(labels_count); + for (int i = 0; i < labels_count; ++i) { + unsigned char ch; + CHECK(stdlabel->Read(&ch, sizeof(ch) != 0)); + labels_[i] = ch; + inst_.push_back((unsigned)i + inst_offset_); + } + delete stdlabel; + } + inline void Shuffle(void) { + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed)); + std::vector tmplabel(labels_.size()); + mshadow::TensorContainer tmpimg(img_.shape_); + for (size_t i = 0; i < inst_.size(); ++i) { + unsigned ridx = inst_[i] - inst_offset_; + mshadow::Copy(tmpimg[i], img_[ridx]); + tmplabel[i] = labels_[ridx]; + } + // copy back + mshadow::Copy(img_, tmpimg); + labels_ = tmplabel; + } + + private: + inline static int ReadInt(dmlc::Stream *fi) { + unsigned char buf[4]; + CHECK(fi->Read(buf, sizeof(buf)) == sizeof(buf)) + << "invalid mnist format"; + return reinterpret_cast(buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]); + } + + private: + /*! \brief MNIST iter params */ + MNISTParam param; + /*! \brief output */ + DataBatch out_; + /*! \brief current location */ + index_t loc_; + /*! \brief image content */ + mshadow::Tensor img_; + /*! \brief label content */ + std::vector labels_; + /*! \brief batch data tensor */ + mshadow::Tensor batch_data_; + /*! \brief batch label tensor */ + mshadow::Tensor batch_label_; + /*! \brief instance index offset */ + unsigned inst_offset_; + /*! \brief instance index */ + std::vector inst_; + // magic number to setup randomness + static const int kRandMagic = 0; +}; // class MNISTIter DMLC_REGISTER_PARAMETER(MNISTParam); -MXNET_REGISTER_IO_ITER(MNISTIterator, MNISTIterator) - .describe("Create MNISTIterator") +MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) + .describe("Create MNISTIter") .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet From 08f12b8cb6c189ed7f84bea0c3124e7ab5f61d16 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 11:59:30 +0800 Subject: [PATCH 55/61] add io.md in doc --- python/mxnet/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 245476be1397..f5b4e1f2f43b 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -41,7 +41,7 @@ def beforefirst(self): def next(self): """get next data from iterator - + Returns ------- labels and images for the next batch @@ -64,7 +64,7 @@ def iter_next(self): next_res = ctypes.c_int(0) check_call(_LIB.MXDataIterNext(self.handle, ctypes.byref(next_res))) return next_res.value - + def getdata(self): """get data from batch From 9bdcebd59807e53f164b61976252bfed56ea9f9f Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Wed, 26 Aug 2015 22:54:08 +0800 Subject: [PATCH 56/61] merged latest master --- python/mxnet/io.py | 4 +- python/test_io.py | 20 ---------- python/test_mnist.py | 95 ++++++++++++++++++++++++++++++-------------- src/io/io.cc | 7 ---- 4 files changed, 68 insertions(+), 58 deletions(-) delete mode 100644 python/test_io.py diff --git a/python/mxnet/io.py b/python/mxnet/io.py index f5b4e1f2f43b..81c0128ef965 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -33,14 +33,14 @@ def __iter__(self): """ return self - def beforefirst(self): + def reset(self): """set loc to 0 """ check_call(_LIB.MXDataIterBeforeFirst(self.handle)) def next(self): - """get next data from iterator + """get next data batch from iterator Returns ------- diff --git a/python/test_io.py b/python/test_io.py deleted file mode 100644 index 940a4baa5c0d..000000000000 --- a/python/test_io.py +++ /dev/null @@ -1,20 +0,0 @@ -#pylint: skip-file -import mxnet as mx -import numpy as np -import os - -dataiter = mx.io.MNISTIterator(path_img="/home/tianjun/data/mnist/train-images-idx3-ubyte", - path_label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=1, input_flat="flat") - -dataiter.beforefirst() - -idx = 0 -while dataiter.next(): - info = "Batch %d" % (idx) - idx += 1 - print info - ''' - label = dataiter.getlabel() - print label.numpy - ''' diff --git a/python/test_mnist.py b/python/test_mnist.py index 9703a06e549b..9a434b55540a 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -14,13 +14,52 @@ def Softmax(x): def CalAcc(out, label): pred = np.argmax(out, axis=1) - return np.sum(pred == label.transpose()) * 1.0 / out.shape[0] + return np.sum(pred == label) * 1.0 / out.shape[0] + + +# load data +class MNISTIter(object): + def __init__(self, which_set, batch_size=100, flatten=True): + if not os.path.exists('mnist.pkl.gz'): + os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") + f = gzip.open('mnist.pkl.gz', 'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + if which_set == 'train': + self.data = train_set[0] + self.label = np.asarray(train_set[1]) + elif which_set == 'valid': + self.data = valid_set[0] + self.label = np.asarray(valid_set[1]) + else: + self.data = test_set[0] + self.data = np.asarray(test_set[1]) + self.flatten = flatten + self.batch_size = batch_size + self.nbatch = self.data.shape[0] / batch_size + assert(self.data.shape[0] % batch_size == 0) # I am lazy + self.now_idx = -1 + def BeforeFirst(self): + self.now_idx = -1 + def Next(self): + self.now_idx += 1 + if self.now_idx == self.nbatch: + return False + return True + def Get(self): + if self.now_idx < 0: + raise Exception("Iterator is at head") + elif self.now_idx >= self.nbatch: + raise Exception("Iterator is at end") + start = self.now_idx * self.batch_size + end = (self.now_idx + 1) * self.batch_size + if self.flatten: + return (self.data[start:end, :], self.label[start:end]) + else: + return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), + self.label[start:end]) + -def SetGradient(out_grad, label): - assert(out_grad.shape[0] == label.shape[0]) - for i in xrange(label.shape[0]): - k = label[i] - out_grad[i][k] -= 1.0 # symbol net batch_size = 100 @@ -30,8 +69,10 @@ def SetGradient(out_grad, label): fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) args_list = fc2.list_arguments() # infer shape -data_shape = (batch_size, 1, 1, 784) -arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +#data_shape = (batch_size, 784) + +data_shape = (batch_size, 1, 28, 28) +arg_shapes, out_shapes = softmax.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] @@ -65,31 +106,24 @@ def Update(mom, grad, weight): block = zip(mom_narrays, grad_narrays, arg_narrays) -train_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/train-images-idx3-ubyte", - label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=0, flat=1, seed=1) -val_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", - batch_size=100, shuffle=1, silent=0, flat=1) +train = MNISTIter("train", batch_size, False) +valid = MNISTIter("valid", batch_size, False) for i in xrange(epoch): # train print "Epoch %d" % i train_acc = 0.0 val_acc = 0.0 - train_nbatch = 0 - val_nbatch = 0 - - for data, label in train_dataiter: - data = data.numpy - label = label.numpy.astype(np.int32) + while train.Next(): + data, label = train.Get() + print np.shape(data) + print np.shape(label) + exit(0) inputs["data"].numpy[:] = data + inputs["sm_label"].numpy[:] = label executor.forward() out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) - train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy SetGradient(grad_narray.numpy, label) executor.backward([grad_narray]) @@ -98,12 +132,15 @@ def Update(mom, grad, weight): Update(mom, grad, weight) # evaluate - for data, label in val_dataiter: - data = data.numpy - label = label.numpy.astype(np.int32) + while valid.Next(): + data, label = valid.Get() inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) - val_nbatch += 1 - print "Train Acc: ", train_acc / train_nbatch - print "Valid Acc: ", val_acc / val_nbatch + print "Train Acc: ", train_acc / train.nbatch + print "Valid Acc: ", val_acc / valid.nbatch + train.BeforeFirst() + valid.BeforeFirst() + + + diff --git a/src/io/io.cc b/src/io/io.cc index fb7a8c2d3092..bd5b78dda643 100644 --- a/src/io/io.cc +++ b/src/io/io.cc @@ -3,14 +3,7 @@ #define _CRT_SECURE_NO_DEPRECATE #include -#include -#include #include -#include -#include -#include -#include -#include "iter_mnist-inl.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::mxnet::DataIteratorReg); From e68452f576a3ae52c263e686d4a8e6f73e9fdc22 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 27 Aug 2015 13:56:45 +0800 Subject: [PATCH 57/61] add local io into test --- Makefile | 2 +- include/mxnet/base.h | 2 - include/mxnet/c_api.h | 26 +++---- python/mxnet/io.py | 6 +- python/test_mnist.py | 146 -------------------------------------- src/c_api.cc | 1 - src/io/iter_mnist.cc | 58 ++++++++------- test/io_mnist_test.cc | 96 ------------------------- tests/python/test_conv.py | 87 ++++++----------------- tests/python/test_mlp.py | 87 ++++++----------------- 10 files changed, 95 insertions(+), 416 deletions(-) delete mode 100644 python/test_mnist.py delete mode 100644 test/io_mnist_test.cc diff --git a/Makefile b/Makefile index bdcbe56a1d24..bdebed0b5ae6 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif #BIN = test/test_threaded_engine test/api_registry_test OBJ = narray_function_cpu.o # add threaded engine after it is done -OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter-mnist.o +OBJCXX11 = reshape_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o io.o iter_mnist.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a diff --git a/include/mxnet/base.h b/include/mxnet/base.h index 04b634ff8b87..a7a3a8063a92 100644 --- a/include/mxnet/base.h +++ b/include/mxnet/base.h @@ -55,8 +55,6 @@ typedef mshadow::TBlob TBlob; namespace dmlc { // Add a few patches to support TShape in dmlc/parameter. DMLC_DECLARE_TYPE_NAME(mxnet::TShape, "Shape(tuple)"); -DMLC_DECLARE_TYPE_NAME(uint32_t, "unsigned int"); - namespace parameter { template<> diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 3c1213fafc17..65dfc9473999 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -454,7 +454,7 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, // Part 5: IO Interface //-------------------------------------------- /*! - * \brief list all the available iterator entries + * \brief List all the available iterator entries * \param out_size the size of returned iterators * \param out_array the output iteratos entries * \return 0 when success, -1 when failure happens @@ -462,7 +462,7 @@ MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, MXNET_DLL int MXListDataIters(mx_uint *out_size, DataIterCreator **out_array); /*! - * \brief init an iterator, init with parameters + * \brief Init an iterator, init with parameters * the array size of passed in arguments * \param handle of the iterator creator * \param num_param number of parameter @@ -495,13 +495,13 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, const char ***arg_type_infos, const char ***arg_descriptions); /*! - * \brief free the handle to the IO module + * \brief Free the handle to the IO module * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief get the name of iterator entry + * \brief Get the name of iterator entry * \param iter iterator entry * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens @@ -509,7 +509,7 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); MXNET_DLL int MXDataIterGetName(DataIterCreator iter, const char **out_name); /*! - * \brief init an iterator, init with parameters + * \brief Init an iterator, init with parameters * the array size of passed in arguments * \param handle of the iterator creator * \param num_param number of parameter @@ -542,13 +542,13 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, const char ***arg_type_infos, const char ***arg_descriptions); /*! - * \brief free the handle to the IO module + * \brief Free the handle to the IO module * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief get the name of iterator entry + * \brief Get the name of iterator entry * \param iter iterator entry * \param out_name the name of the iterator * \return 0 when success, -1 when failure happens @@ -556,7 +556,7 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); MXNET_DLL int MXDataIterGetName(DataIterCreator iter, const char **out_name); /*! - * \brief init an iterator, init with parameters + * \brief Init an iterator, init with parameters * the array size of passed in arguments * \param handle of the iterator creator * \param num_param number of parameter @@ -589,13 +589,13 @@ MXNET_DLL int MXDataIterGetIterInfo(AtomicSymbolCreator creator, const char ***arg_type_infos, const char ***arg_descriptions); /*! - * \brief free the handle to the IO module + * \brief Free the handle to the IO module * \param handle the handle pointer to the data iterator * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterFree(DataIterHandle handle); /*! - * \brief move iterator to next position + * \brief Move iterator to next position * \param handle the handle to iterator * \param out return value of next * \return 0 when success, -1 when failure happens @@ -603,14 +603,14 @@ MXNET_DLL int MXDataIterFree(DataIterHandle handle); MXNET_DLL int MXDataIterNext(DataIterHandle handle, int *out); /*! - * \brief call iterator.Reset + * \brief Call iterator.Reset * \param handle the handle to iterator * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); /*! - * \brief get the handle to the NArray of underlying data + * \brief Get the handle to the NArray of underlying data * \param handle the handle pointer to the data iterator * \param out handle to underlying data NArray * \return 0 when success, -1 when failure happens @@ -618,7 +618,7 @@ MXNET_DLL int MXDataIterBeforeFirst(DataIterHandle handle); MXNET_DLL int MXDataIterGetData(DataIterHandle handle, NArrayHandle *out); /*! - * \brief get the handle to the NArray of underlying label + * \brief Get the handle to the NArray of underlying label * \param handle the handle pointer to the data iterator * \param out the handle to underlying label NArray * \return 0 when success, -1 when failure happens diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 81c0128ef965..633fa06bb6d3 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -6,7 +6,7 @@ import ctypes import sys from .base import _LIB -from .base import c_array, c_str, mx_uint +from .base import c_array, c_str, mx_uint, py_str from .base import DataIterHandle, NArrayHandle from .base import check_call from .narray import NArray @@ -96,12 +96,12 @@ def _make_io_iterator(handle): ctypes.byref(arg_names), \ ctypes.byref(arg_types), \ ctypes.byref(arg_descs))) - iter_name = name.value + iter_name = py_str(name.value) param_str = [] for i in range(num_args.value): ret = '%s : %s' % (arg_names[i], arg_types[i]) if len(arg_descs[i]) != 0: - ret += '\n ' + arg_descs[i] + ret += '\n ' + py_str(arg_descs[i]) param_str.append(ret) doc_str = ('%s\n\n' + diff --git a/python/test_mnist.py b/python/test_mnist.py deleted file mode 100644 index 9a434b55540a..000000000000 --- a/python/test_mnist.py +++ /dev/null @@ -1,146 +0,0 @@ -# pylint: skip-file -import mxnet as mx -import numpy as np -import os, cPickle, gzip - -def Softmax(x): - batch, nidden = x.shape - maxes = np.max(x, axis=1) - x -= maxes.reshape(batch, 1) - x = np.exp(x) - norm = np.sum(x, axis=1) - prob = x / norm.reshape((batch, 1)) - return prob - -def CalAcc(out, label): - pred = np.argmax(out, axis=1) - return np.sum(pred == label) * 1.0 / out.shape[0] - - -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100, flatten=True): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - train_set, valid_set, test_set = cPickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.flatten = flatten - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - if self.flatten: - return (self.data[start:end, :], self.label[start:end]) - else: - return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), - self.label[start:end]) - - - -# symbol net -batch_size = 100 -data = mx.symbol.Variable('data') -fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=160) -act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") -fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10) -args_list = fc2.list_arguments() -# infer shape -#data_shape = (batch_size, 784) - -data_shape = (batch_size, 1, 28, 28) -arg_shapes, out_shapes = softmax.infer_shape(data=data_shape) -arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] -grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] -mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] -inputs = dict(zip(args_list, arg_narrays)) - -np.random.seed(0) -# set random weight -for name, narray in inputs.items(): - if "weight" in name: - narray.numpy[:, :] = np.random.uniform(-0.001, 0.001, narray.numpy.shape) - if "bias" in name: - narray.numpy[:] = 0.0 - -req = ['write_to' for i in range(len(arg_narrays))] -# bind executer -# TODO(bing): think of a better bind interface -executor = fc2.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req) -# update - -out_narray = executor.heads()[0] -grad_narray = mx.narray.create(out_narray.shape) - -epoch = 10 -momentum = 0.9 -lr = 0.001 -wd = 0.0004 - -def Update(mom, grad, weight): - weight.numpy[:] -= lr * grad.numpy[:] - -block = zip(mom_narrays, grad_narrays, arg_narrays) - - -train = MNISTIter("train", batch_size, False) -valid = MNISTIter("valid", batch_size, False) - -for i in xrange(epoch): - # train - print "Epoch %d" % i - train_acc = 0.0 - val_acc = 0.0 - while train.Next(): - data, label = train.Get() - print np.shape(data) - print np.shape(label) - exit(0) - inputs["data"].numpy[:] = data - inputs["sm_label"].numpy[:] = label - executor.forward() - out_narray.numpy[:] = Softmax(out_narray.numpy) - train_acc += CalAcc(out_narray.numpy, label) - grad_narray.numpy[:] = out_narray.numpy - SetGradient(grad_narray.numpy, label) - executor.backward([grad_narray]) - - for mom, grad, weight in block: - Update(mom, grad, weight) - - # evaluate - while valid.Next(): - data, label = valid.Get() - inputs["data"].numpy[:] = data - executor.forward() - val_acc += CalAcc(out_narray.numpy, label) - print "Train Acc: ", train_acc / train.nbatch - print "Valid Acc: ", val_acc / valid.nbatch - train.BeforeFirst() - valid.BeforeFirst() - - - diff --git a/src/c_api.cc b/src/c_api.cc index 4d7381fde0f1..0d496c3855bf 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index b48064de184a..a6336e59afee 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -4,7 +4,6 @@ * \brief register mnist iterator * \author Tianjun Xiao */ -#include #include #include #include @@ -38,14 +37,14 @@ struct MNISTParam : public dmlc::Parameter { .describe("Mnist image path."); DMLC_DECLARE_FIELD(label).set_default("./train-labels-idx1-ubyte") .describe("Mnist label path."); - DMLC_DECLARE_FIELD(shuffle).set_default(false) + DMLC_DECLARE_FIELD(batch_size).set_lower_bound(1).set_default(128) + .describe("Batch Size."); + DMLC_DECLARE_FIELD(shuffle).set_default(true) .describe("Whether to shuffle data."); + DMLC_DECLARE_FIELD(flat).set_default(false) + .describe("Whether to flat the data into 1D."); DMLC_DECLARE_FIELD(silent).set_default(false) .describe("Whether to print out data info."); - DMLC_DECLARE_FIELD(batch_size).set_range(1, 100000).set_default(128) - .describe("Batch Size."); - DMLC_DECLARE_FIELD(flat).set_default(true) - .describe("Whether to flat the data into 1D."); DMLC_DECLARE_FIELD(seed).set_default(0) .describe("Random Seed."); } @@ -64,40 +63,49 @@ class MNISTIter: public IIterator { // intialize iterator loads data in virtual void Init(const std::vector >& kwargs) { std::map kmap(kwargs.begin(), kwargs.end()); - param.Init(kmap); + param_.Init(kmap); this->LoadImage(); this->LoadLabel(); // set name this->SetDataName(std::string("data")); this->SetDataName(std::string("label")); - if (param.flat) { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, 1, img_.size(1) * img_.size(2)); + if (param_.flat) { + batch_data_.shape_ = mshadow::Shape4(param_.batch_size, 1, 1, img_.size(1) * img_.size(2)); } else { - batch_data_.shape_ = mshadow::Shape4(param.batch_size, 1, img_.size(1), img_.size(2)); + batch_data_.shape_ = mshadow::Shape4(param_.batch_size, 1, img_.size(1), img_.size(2)); } out_.inst_index = NULL; - batch_label_.shape_ = mshadow::Shape2(param.batch_size, 1); + batch_label_.shape_ = mshadow::Shape2(param_.batch_size, 1); batch_label_.stride_ = 1; batch_data_.stride_ = batch_data_.size(3); - out_.batch_size = param.batch_size; - if (param.shuffle) this->Shuffle(); - if (param.silent == 0) { + out_.batch_size = param_.batch_size; + if (param_.shuffle) this->Shuffle(); + if (param_.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; - printf("MNISTIter: load %u images, shuffle=%d, shape=%u,%u,%u,%u\n", - (unsigned)img_.size(0), param.shuffle, s[0], s[1], s[2], s[3]); + if (param_.flat) { + LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" + << param_.shuffle << ", shape=" << s[0] << "," << s[3]; + } else { + LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" + << param_.shuffle << ", shape=" << s[0] << "," << s[1] << "," << s[2] << "," + << s[3]; + } } } virtual void BeforeFirst(void) { this->loc_ = 0; } virtual bool Next(void) { - if (loc_ + param.batch_size <= img_.size(0)) { + if (loc_ + param_.batch_size <= img_.size(0)) { batch_data_.dptr_ = img_[loc_].dptr_; batch_label_.dptr_ = &labels_[loc_]; - out_.data[0] = TBlob(batch_data_); + if (param_.flat) + out_.data[0] = TBlob(batch_data_.FlatTo2D()); + else + out_.data[0] = TBlob(batch_data_); out_.data[1] = TBlob(batch_label_); out_.inst_index = &inst_[loc_]; - loc_ += param.batch_size; + loc_ += param_.batch_size; return true; } else { return false; @@ -109,7 +117,7 @@ class MNISTIter: public IIterator { private: inline void LoadImage(void) { - dmlc::Stream *stdimg = dmlc::Stream::Create(param.image.c_str(), "r"); + dmlc::Stream *stdimg = dmlc::Stream::Create(param_.image.c_str(), "r"); ReadInt(stdimg); int image_count = ReadInt(stdimg); int image_rows = ReadInt(stdimg); @@ -134,7 +142,7 @@ class MNISTIter: public IIterator { delete stdimg; } inline void LoadLabel(void) { - dmlc::Stream *stdlabel = dmlc::Stream::Create(param.label.c_str(), "r"); + dmlc::Stream *stdlabel = dmlc::Stream::Create(param_.label.c_str(), "r"); ReadInt(stdlabel); int labels_count = ReadInt(stdlabel); labels_.resize(labels_count); @@ -147,7 +155,7 @@ class MNISTIter: public IIterator { delete stdlabel; } inline void Shuffle(void) { - std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param.seed)); + std::shuffle(inst_.begin(), inst_.end(), common::RANDOM_ENGINE(kRandMagic+param_.seed)); std::vector tmplabel(labels_.size()); mshadow::TensorContainer tmpimg(img_.shape_); for (size_t i = 0; i < inst_.size(); ++i) { @@ -170,7 +178,7 @@ class MNISTIter: public IIterator { private: /*! \brief MNIST iter params */ - MNISTParam param; + MNISTParam param_; /*! \brief output */ DataBatch out_; /*! \brief current location */ @@ -193,7 +201,9 @@ class MNISTIter: public IIterator { DMLC_REGISTER_PARAMETER(MNISTParam); MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) - .describe("Create MNISTIter") + .describe("Create data iterator for MNIST hand-written digit number \ + recogonition dataset, which include 50000 training images and \ + 10000 testing images. All images are 28 * 28 gray-scaled.") .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc deleted file mode 100644 index 2bfba24a507a..000000000000 --- a/test/io_mnist_test.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2015 by Contributors -// IO test code - -#include -#include -#include -#include -#include -#include "mxnet/io.h" -#include "../src/io/iter_mnist-inl.h" - -using namespace std; -using namespace mxnet; -using namespace dmlc; - -void InitIter(IIterator* itr, - const std::vector< std::pair< std::string, std::string> > &defcfg) { - for (size_t i = 0; i < defcfg.size(); ++i) { - itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); - } - itr->Init(); -} - -IIterator* CreateIterators( - const std::vector< std::pair< std::string, std::string> >& cfg) { - IIterator* data_itr = NULL; - int flag = 0; - std::string evname; - std::vector< std::pair< std::string, std::string> > itcfg; - std::vector< std::pair< std::string, std::string> > defcfg; - for (size_t i = 0; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "data")) { - flag = 1; continue; - } - if (!strcmp(name, "eval")) { - flag = 2; continue; - } - if (!strcmp(name, "pred")) { - flag = 3; continue; - } - if (!strcmp(name, "iterend") && !strcmp(val, "true")) { - if (flag == 1) { - data_itr = mxnet::CreateIterator(itcfg); - } - flag = 0; itcfg.clear(); - } - if (flag == 0) { - defcfg.push_back(cfg[i]); - } else { - itcfg.push_back(cfg[i]); - } - } - if (data_itr != NULL) { - InitIter(data_itr, defcfg); - } - return data_itr; -} - -/*! - * Usage: ./io_mnist_test /path/to/io_config/file - * Example - * data = train - * iter = mnist - * path_img = "./data/mnist/train-images-idx3-ubyte" - * path_label = "./data/mnist/train-labels-idx1-ubyte" - * shuffle = 1 - * iterend = true - * input_shape = 1,1,784 - * batch_size = 100 - * - */ - -int main(int argc, char** argv) { - std::ifstream ifs(argv[1], std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - Config cfg(ifs); - for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - IIterator* data_itr = CreateIterators(itcfg); - data_itr->BeforeFirst(); - int batch_dir = 0; - while (data_itr->Next()) { - std::cout << "Label of Batch " << batch_dir++ << std::endl; - // print label - DataBatch db = data_itr->Value(); - mshadow::Tensor label = db.data[1].get(); - for (size_t i = 0; i < label.shape_.shape_[0]; i++) - std::cout << label.dptr_[i] << " "; - std::cout << "\n"; - } -} diff --git a/tests/python/test_conv.py b/tests/python/test_conv.py index a456b19982f8..4ba7963bb17d 100644 --- a/tests/python/test_conv.py +++ b/tests/python/test_conv.py @@ -9,58 +9,6 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] -def IgnorePython3(): - if sys.version_info[0] >= 3: - # TODO(tianjun): use IO instead of pickle - # Python3 pickle is not able to load data correctly - sys.exit(0) - - -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100, flatten=True): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - IgnorePython3() - train_set, valid_set, test_set = pickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.flatten = flatten - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - if self.flatten: - return (self.data[start:end, :], self.label[start:end]) - else: - return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), - self.label[start:end]) - - - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -112,9 +60,14 @@ def Update(grad, weight): block = zip(grad_narrays, arg_narrays) - -train = MNISTIter("train", batch_size, False) -valid = MNISTIter("valid", batch_size, False) +train_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/train-images-idx3-ubyte", + label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, silent=0, seed=10) +val_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, silent=0) def test_mnist(): acc_train = 0.0 @@ -124,12 +77,16 @@ def test_mnist(): print("Epoch %d" % i) train_acc = 0.0 val_acc = 0.0 - while train.Next(): - data, label = train.Get() + train_nbatch = 0 + val_nbatch = 0 + for data, label in train_dataiter: + data = data.numpy + label = label.numpy.flatten() inputs["data"].numpy[:] = data inputs["sm_label"].numpy[:] = label executor.forward() train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy executor.backward([grad_narray]) @@ -137,17 +94,17 @@ def test_mnist(): Update(grad, weight) # evaluate - while valid.Next(): - data, label = valid.Get() + for data, label in val_dataiter: + data = data.numpy + label = label.numpy.flatten() inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) - print("Train Acc: ", train_acc / train.nbatch) - print("Valid Acc: ", val_acc / valid.nbatch) - acc_train = train_acc / train.nbatch - acc_val = val_acc / valid.nbatch - train.BeforeFirst() - valid.BeforeFirst() + val_nbatch += 1 + print("Train Acc: ", train_acc / train_nbatch) + print("Valid Acc: ", val_acc / val_nbatch) + acc_train = train_acc / train_nbatch + acc_val = val_acc / val_nbatch assert(acc_train > 0.84) assert(acc_val > 0.96) diff --git a/tests/python/test_mlp.py b/tests/python/test_mlp.py index 4770c19b9136..46026b83a5ab 100644 --- a/tests/python/test_mlp.py +++ b/tests/python/test_mlp.py @@ -8,57 +8,6 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] -def IgnorePython3(): - if sys.version_info[0] >= 3: - # TODO(tianjun): use IO instead of pickle - # Python3 pickle is not able to load data correctly - sys.exit(0) - - -# load data -class MNISTIter(object): - def __init__(self, which_set, batch_size=100, flatten=True): - if not os.path.exists('mnist.pkl.gz'): - os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") - f = gzip.open('mnist.pkl.gz', 'rb') - IgnorePython3() - train_set, valid_set, test_set = pickle.load(f) - f.close() - if which_set == 'train': - self.data = train_set[0] - self.label = np.asarray(train_set[1]) - elif which_set == 'valid': - self.data = valid_set[0] - self.label = np.asarray(valid_set[1]) - else: - self.data = test_set[0] - self.data = np.asarray(test_set[1]) - self.flatten = flatten - self.batch_size = batch_size - self.nbatch = self.data.shape[0] / batch_size - assert(self.data.shape[0] % batch_size == 0) # I am lazy - self.now_idx = -1 - def BeforeFirst(self): - self.now_idx = -1 - def Next(self): - self.now_idx += 1 - if self.now_idx == self.nbatch: - return False - return True - def Get(self): - if self.now_idx < 0: - raise Exception("Iterator is at head") - elif self.now_idx >= self.nbatch: - raise Exception("Iterator is at end") - start = self.now_idx * self.batch_size - end = (self.now_idx + 1) * self.batch_size - if self.flatten: - return (self.data[start:end, :], self.label[start:end]) - else: - return (self.data[start:end, :].reshape(batch_size, 1, 28, 28), - self.label[start:end]) - - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -100,10 +49,14 @@ def Update(grad, weight): block = zip(grad_narrays, arg_narrays) - - -train = MNISTIter("train", batch_size, True) -valid = MNISTIter("valid", batch_size, True) +train_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/train-images-idx3-ubyte", + label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) +val_dataiter = mx.io.MNISTIter( + image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", + label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0) def test_mlp(): acc_train = 0. @@ -113,12 +66,16 @@ def test_mlp(): print("Epoch %d" % i) train_acc = 0.0 val_acc = 0.0 - while train.Next(): - data, label = train.Get() + train_nbatch = 0 + val_nbatch = 0 + for data, label in train_dataiter: + data = data.numpy + label = label.numpy.flatten() inputs["data"].numpy[:] = data inputs["sm_label"].numpy[:] = label executor.forward() train_acc += CalAcc(out_narray.numpy, label) + train_nbatch += 1 grad_narray.numpy[:] = out_narray.numpy executor.backward([grad_narray]) @@ -126,17 +83,17 @@ def test_mlp(): Update(grad, weight) # evaluate - while valid.Next(): - data, label = valid.Get() + for data, label in val_dataiter: + data = data.numpy + label = label.numpy.flatten() inputs["data"].numpy[:] = data executor.forward() val_acc += CalAcc(out_narray.numpy, label) - acc_train = train_acc / train.nbatch - acc_val = val_acc / valid.nbatch - print("Train Acc: ", train_acc / train.nbatch) - print("Valid Acc: ", val_acc / valid.nbatch) - train.BeforeFirst() - valid.BeforeFirst() + val_nbatch += 1 + acc_train = train_acc / train_nbatch + acc_val = val_acc / val_nbatch + print("Train Acc: ", train_acc / train_nbatch) + print("Valid Acc: ", val_acc / val_nbatch) assert(acc_train > 0.98) assert(acc_val > 0.97) From 32d10aedee10097efb66773f4ae09d1fdaddee92 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 27 Aug 2015 17:21:01 +0800 Subject: [PATCH 58/61] use local io in test --- .gitignore | 1 - doc/python/io.md | 12 ++++++++ src/io/iter_mnist.cc | 10 +++---- tests/python/get_data.py | 29 +++++++++++++++++++ tests/python/test_conv.py | 12 +++++--- tests/python/test_io.py | 59 +++++++++++++++++++++++++++++++++++++++ tests/python/test_mlp.py | 13 ++++++--- 7 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 doc/python/io.md create mode 100644 tests/python/get_data.py create mode 100644 tests/python/test_io.py diff --git a/.gitignore b/.gitignore index d62c63f403e9..549726650e43 100644 --- a/.gitignore +++ b/.gitignore @@ -55,4 +55,3 @@ Debug .dir-locals.el __pycache__ *.pkl -* \ No newline at end of file diff --git a/doc/python/io.md b/doc/python/io.md new file mode 100644 index 000000000000..7bff6a83e354 --- /dev/null +++ b/doc/python/io.md @@ -0,0 +1,12 @@ +Python IO API +=================== +Mxnet handles IO for you by implementing data iterators. +It is like an iterable class in python, you can traverse the data using a for loop. + + +IO API Reference +---------------------- +```eval_rst +.. automodule:: mxnet.io + :members: +``` diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index a6336e59afee..32014a75d00f 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -83,11 +83,11 @@ class MNISTIter: public IIterator { if (param_.silent == 0) { mshadow::Shape<4> s = batch_data_.shape_; if (param_.flat) { - LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" + LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" << param_.shuffle << ", shape=" << s[0] << "," << s[3]; } else { - LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" - << param_.shuffle << ", shape=" << s[0] << "," << s[1] << "," << s[2] << "," + LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" + << param_.shuffle << ", shape=" << s[0] << "," << s[1] << "," << s[2] << "," << s[3]; } } @@ -201,9 +201,7 @@ class MNISTIter: public IIterator { DMLC_REGISTER_PARAMETER(MNISTParam); MXNET_REGISTER_IO_ITER(MNISTIter, MNISTIter) - .describe("Create data iterator for MNIST hand-written digit number \ - recogonition dataset, which include 50000 training images and \ - 10000 testing images. All images are 28 * 28 gray-scaled.") + .describe("Create iterator for MNIST hand-written digit number recognition dataset.") .add_arguments(MNISTParam::__FIELDS__()); } // namespace io } // namespace mxnet diff --git a/tests/python/get_data.py b/tests/python/get_data.py new file mode 100644 index 000000000000..82d25d9072fb --- /dev/null +++ b/tests/python/get_data.py @@ -0,0 +1,29 @@ +# pylint: skip-file +import os, gzip +import pickle as pickle +import sys + +# download mnist.pkl.gz +def GetMNIST_pkl(): + if not os.path.isdir("data/"): + os.system("mkdir data/") + if not os.path.exists('data/mnist.pkl.gz'): + os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz -P data/") + +# download ubyte version of mnist and untar +def GetMNIST_ubyte(): + if not os.path.isdir("data/"): + os.system("mkdir data/") + if not os.path.exists('data/train-images-idx3-ubyte'): + os.system("wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz -P data/") + os.system("gunzip data/train-images-idx3-ubyte.gz") + if not os.path.exists('data/train-labels-idx1-ubyte'): + os.system("wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz -P data/") + os.system("gunzip data/train-labels-idx1-ubyte.gz") + if not os.path.exists('data/t10k-images-idx3-ubyte'): + os.system("wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz -P data/") + os.system("gunzip data/t10k-images-idx3-ubyte.gz") + if not os.path.exists('data/t10k-labels-idx1-ubyte'): + os.system("wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz -P data/") + os.system("gunzip data/t10k-labels-idx1-ubyte.gz") + diff --git a/tests/python/test_conv.py b/tests/python/test_conv.py index 4ba7963bb17d..4a8fb58d680f 100644 --- a/tests/python/test_conv.py +++ b/tests/python/test_conv.py @@ -3,6 +3,7 @@ import numpy as np import os, pickle, gzip import sys +import get_data def CalAcc(out, label): @@ -60,13 +61,16 @@ def Update(grad, weight): block = zip(grad_narrays, arg_narrays) +# check data +get_data.GetMNIST_ubyte() + train_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/train-images-idx3-ubyte", - label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", batch_size=batch_size, shuffle=1, silent=0, seed=10) val_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", batch_size=batch_size, shuffle=1, silent=0) def test_mnist(): diff --git a/tests/python/test_io.py b/tests/python/test_io.py new file mode 100644 index 000000000000..58d710fcc428 --- /dev/null +++ b/tests/python/test_io.py @@ -0,0 +1,59 @@ +# pylint: skip-file +import mxnet as mx +import numpy as np +import os, gzip +import pickle as pickle +import sys +import get_data + +# prepare data +get_data.GetMNIST_ubyte() + +batch_size = 100 +train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) +val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + batch_size=batch_size, shuffle=0, flat=1, silent=0) + +def test_MNISTIter_loop(): + nbatch = 60000 / batch_size + batch_count = 0 + for data, label in train_dataiter: + batch_count += 1 + assert(nbatch == batch_count) + batch_count = 0 + while train_dataiter.iter_next(): + batch_count += 1 + assert(nbatch == batch_count) + +''' +def test_MNISTIter_value(): + imgcount = [0 for i in range(10)] + val_dataiter.reset() + for data, label in val_dataiter: + label = label.numpy.flatten() + for i in range(label.shape[0]): + imgcount[int(label[i])] += 1 + for i in range(10): + print imgcount[i] + for i in range(10): + assert(imgcount[i] == 1000) +''' + +def test_MNISTIter_reset(): + train_dataiter.reset() + train_dataiter.iter_next() + label_0 = train_dataiter.getlabel().numpy.flatten() + train_dataiter.iter_next() + train_dataiter.iter_next() + train_dataiter.iter_next() + train_dataiter.iter_next() + train_dataiter.reset() + train_dataiter.iter_next() + label_1 = train_dataiter.getlabel().numpy.flatten() + assert(sum(label_0 - label_1) == 0) + diff --git a/tests/python/test_mlp.py b/tests/python/test_mlp.py index 46026b83a5ab..568bf41530ef 100644 --- a/tests/python/test_mlp.py +++ b/tests/python/test_mlp.py @@ -4,6 +4,8 @@ import os, gzip import pickle as pickle import sys +import get_data + def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] @@ -49,13 +51,16 @@ def Update(grad, weight): block = zip(grad_narrays, arg_narrays) +#check data +get_data.GetMNIST_ubyte() + train_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/train-images-idx3-ubyte", - label="/home/tianjun/data/mnist/train-labels-idx1-ubyte", + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) val_dataiter = mx.io.MNISTIter( - image="/home/tianjun/data/mnist/t10k-images-idx3-ubyte", - label="/home/tianjun/data/mnist/t10k-labels-idx1-ubyte", + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", batch_size=batch_size, shuffle=1, flat=1, silent=0) def test_mlp(): From f1b976424598d874406621f5dc052c77ed9af6b6 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 27 Aug 2015 17:57:35 +0800 Subject: [PATCH 59/61] meet python3 strict --- python/mxnet/io.py | 3 ++ test/io_mnist_test.cc | 96 --------------------------------------- tests/python/test_conv.py | 2 +- 3 files changed, 4 insertions(+), 97 deletions(-) delete mode 100644 test/io_mnist_test.cc diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 633fa06bb6d3..21ceb2786b5e 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -54,6 +54,9 @@ def next(self): self.reset() raise StopIteration + # make it work for both python2 and 3 + __next__ = next + def iter_next(self): """iterate to next data with return value diff --git a/test/io_mnist_test.cc b/test/io_mnist_test.cc deleted file mode 100644 index 2bfba24a507a..000000000000 --- a/test/io_mnist_test.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2015 by Contributors -// IO test code - -#include -#include -#include -#include -#include -#include "mxnet/io.h" -#include "../src/io/iter_mnist-inl.h" - -using namespace std; -using namespace mxnet; -using namespace dmlc; - -void InitIter(IIterator* itr, - const std::vector< std::pair< std::string, std::string> > &defcfg) { - for (size_t i = 0; i < defcfg.size(); ++i) { - itr->SetParam(defcfg[i].first.c_str(), defcfg[i].second.c_str()); - } - itr->Init(); -} - -IIterator* CreateIterators( - const std::vector< std::pair< std::string, std::string> >& cfg) { - IIterator* data_itr = NULL; - int flag = 0; - std::string evname; - std::vector< std::pair< std::string, std::string> > itcfg; - std::vector< std::pair< std::string, std::string> > defcfg; - for (size_t i = 0; i < cfg.size(); ++i) { - const char *name = cfg[i].first.c_str(); - const char *val = cfg[i].second.c_str(); - if (!strcmp(name, "data")) { - flag = 1; continue; - } - if (!strcmp(name, "eval")) { - flag = 2; continue; - } - if (!strcmp(name, "pred")) { - flag = 3; continue; - } - if (!strcmp(name, "iterend") && !strcmp(val, "true")) { - if (flag == 1) { - data_itr = mxnet::CreateIterator(itcfg); - } - flag = 0; itcfg.clear(); - } - if (flag == 0) { - defcfg.push_back(cfg[i]); - } else { - itcfg.push_back(cfg[i]); - } - } - if (data_itr != NULL) { - InitIter(data_itr, defcfg); - } - return data_itr; -} - -/*! - * Usage: ./io_mnist_test /path/to/io_config/file - * Example - * data = train - * iter = mnist - * path_img = "./data/mnist/train-images-idx3-ubyte" - * path_label = "./data/mnist/train-labels-idx1-ubyte" - * shuffle = 1 - * iterend = true - * input_shape = 1,1,784 - * batch_size = 100 - * - */ - -int main(int argc, char** argv) { - std::ifstream ifs(argv[1], std::ifstream::in); - std::vector< std::pair< std::string, std::string> > itcfg; - Config cfg(ifs); - for (Config::ConfigIterator iter = cfg.begin(); iter != cfg.end(); ++iter) { - Config::ConfigEntry ent = *iter; - itcfg.push_back(std::make_pair(ent.first, ent.second)); - } - // Get the data and init - IIterator* data_itr = CreateIterators(itcfg); - data_itr->BeforeFirst(); - int batch_dir = 0; - while (data_itr->Next()) { - std::cout << "Label of Batch " << batch_dir++ << std::endl; - // print label - DataBatch db = data_itr->Value(); - mshadow::Tensor label = db.data[1].get(); - for (size_t i = 0; i < label.shape_.shape_[0]; i++) - std::cout << label.dptr_[i] << " "; - std::cout << "\n"; - } -} diff --git a/tests/python/test_conv.py b/tests/python/test_conv.py index 4a8fb58d680f..1e0823075e48 100644 --- a/tests/python/test_conv.py +++ b/tests/python/test_conv.py @@ -76,7 +76,7 @@ def Update(grad, weight): def test_mnist(): acc_train = 0.0 acc_val = 0.0 - for i in xrange(epoch): + for i in range(epoch): # train print("Epoch %d" % i) train_acc = 0.0 From 982f18d1a0f8018a59fcd2a0ab0f6cfb50ff23f8 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Thu, 27 Aug 2015 18:54:18 +0800 Subject: [PATCH 60/61] can not train in python3 --- tests/python/test_conv.py | 4 +++- tests/python/test_io.py | 14 -------------- tests/python/test_mlp.py | 3 +++ 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tests/python/test_conv.py b/tests/python/test_conv.py index 1e0823075e48..51c89fd2cb07 100644 --- a/tests/python/test_conv.py +++ b/tests/python/test_conv.py @@ -5,11 +5,13 @@ import sys import get_data - def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] +if sys.version_info[0] >= 3: + sys.exit(0) + # symbol net batch_size = 100 data = mx.symbol.Variable('data') diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 58d710fcc428..2d4b8d2d26f5 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -30,20 +30,6 @@ def test_MNISTIter_loop(): batch_count += 1 assert(nbatch == batch_count) -''' -def test_MNISTIter_value(): - imgcount = [0 for i in range(10)] - val_dataiter.reset() - for data, label in val_dataiter: - label = label.numpy.flatten() - for i in range(label.shape[0]): - imgcount[int(label[i])] += 1 - for i in range(10): - print imgcount[i] - for i in range(10): - assert(imgcount[i] == 1000) -''' - def test_MNISTIter_reset(): train_dataiter.reset() train_dataiter.iter_next() diff --git a/tests/python/test_mlp.py b/tests/python/test_mlp.py index 568bf41530ef..b952734547f6 100644 --- a/tests/python/test_mlp.py +++ b/tests/python/test_mlp.py @@ -10,6 +10,9 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] +if sys.version_info[0] >= 3: + sys.exit(0) + # symbol net batch_size = 100 data = mx.symbol.Variable('data') From 25e3363174586515853386641d6b82a8656feb32 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Fri, 28 Aug 2015 13:37:30 +0800 Subject: [PATCH 61/61] list zip for py3, no reset in iter --- python/mxnet/io.py | 1 - src/io/iter_mnist.cc | 8 ++++---- tests/python/test_conv.py | 11 +++++------ tests/python/test_io.py | 4 ---- tests/python/test_mlp.py | 11 +++++------ 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/python/mxnet/io.py b/python/mxnet/io.py index 21ceb2786b5e..58dbe6e6f9a3 100644 --- a/python/mxnet/io.py +++ b/python/mxnet/io.py @@ -51,7 +51,6 @@ def next(self): if next_res.value: return self.getdata(), self.getlabel() else: - self.reset() raise StopIteration # make it work for both python2 and 3 diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc index 32014a75d00f..93195061b278 100644 --- a/src/io/iter_mnist.cc +++ b/src/io/iter_mnist.cc @@ -81,14 +81,14 @@ class MNISTIter: public IIterator { out_.batch_size = param_.batch_size; if (param_.shuffle) this->Shuffle(); if (param_.silent == 0) { - mshadow::Shape<4> s = batch_data_.shape_; + mshadow::TShape s; + s = batch_data_.shape_; if (param_.flat) { LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" - << param_.shuffle << ", shape=" << s[0] << "," << s[3]; + << param_.shuffle << ", shape=" << s.FlatTo2D(); } else { LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle=" - << param_.shuffle << ", shape=" << s[0] << "," << s[1] << "," << s[2] << "," - << s[3]; + << param_.shuffle << ", shape=" << s; } } } diff --git a/tests/python/test_conv.py b/tests/python/test_conv.py index 51c89fd2cb07..0604476d4bb5 100644 --- a/tests/python/test_conv.py +++ b/tests/python/test_conv.py @@ -9,9 +9,6 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] -if sys.version_info[0] >= 3: - sys.exit(0) - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -61,7 +58,7 @@ def CalAcc(out, label): def Update(grad, weight): weight.numpy[:] -= lr * grad.numpy[:] / batch_size -block = zip(grad_narrays, arg_narrays) +block = list(zip(grad_narrays, arg_narrays)) # check data get_data.GetMNIST_ubyte() @@ -69,11 +66,11 @@ def Update(grad, weight): train_dataiter = mx.io.MNISTIter( image="data/train-images-idx3-ubyte", label="data/train-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, silent=0, seed=10) + batch_size=batch_size, shuffle=True, silent=False, seed=10) val_dataiter = mx.io.MNISTIter( image="data/t10k-images-idx3-ubyte", label="data/t10k-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, silent=0) + batch_size=batch_size, shuffle=True, silent=False) def test_mnist(): acc_train = 0.0 @@ -111,6 +108,8 @@ def test_mnist(): print("Valid Acc: ", val_acc / val_nbatch) acc_train = train_acc / train_nbatch acc_val = val_acc / val_nbatch + train_dataiter.reset() + val_dataiter.reset() assert(acc_train > 0.84) assert(acc_val > 0.96) diff --git a/tests/python/test_io.py b/tests/python/test_io.py index 2d4b8d2d26f5..dfeb3f67c293 100644 --- a/tests/python/test_io.py +++ b/tests/python/test_io.py @@ -25,10 +25,6 @@ def test_MNISTIter_loop(): for data, label in train_dataiter: batch_count += 1 assert(nbatch == batch_count) - batch_count = 0 - while train_dataiter.iter_next(): - batch_count += 1 - assert(nbatch == batch_count) def test_MNISTIter_reset(): train_dataiter.reset() diff --git a/tests/python/test_mlp.py b/tests/python/test_mlp.py index b952734547f6..8a84d50536c3 100644 --- a/tests/python/test_mlp.py +++ b/tests/python/test_mlp.py @@ -10,9 +10,6 @@ def CalAcc(out, label): pred = np.argmax(out, axis=1) return np.sum(pred == label) * 1.0 / out.shape[0] -if sys.version_info[0] >= 3: - sys.exit(0) - # symbol net batch_size = 100 data = mx.symbol.Variable('data') @@ -52,7 +49,7 @@ def CalAcc(out, label): def Update(grad, weight): weight.numpy[:] -= lr * grad.numpy[:] / batch_size -block = zip(grad_narrays, arg_narrays) +block = list(zip(grad_narrays, arg_narrays)) #check data get_data.GetMNIST_ubyte() @@ -60,11 +57,11 @@ def Update(grad, weight): train_dataiter = mx.io.MNISTIter( image="data/train-images-idx3-ubyte", label="data/train-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, flat=1, silent=0, seed=10) + batch_size=batch_size, shuffle=True, flat=True, silent=False, seed=10) val_dataiter = mx.io.MNISTIter( image="data/t10k-images-idx3-ubyte", label="data/t10k-labels-idx1-ubyte", - batch_size=batch_size, shuffle=1, flat=1, silent=0) + batch_size=batch_size, shuffle=True, flat=True, silent=False) def test_mlp(): acc_train = 0. @@ -102,6 +99,8 @@ def test_mlp(): acc_val = val_acc / val_nbatch print("Train Acc: ", train_acc / train_nbatch) print("Valid Acc: ", val_acc / val_nbatch) + train_dataiter.reset() + val_dataiter.reset() assert(acc_train > 0.98) assert(acc_val > 0.97)