From 9f0f7cdc8a72185b5b0a0866821a33700ed2ed51 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 26 Sep 2015 18:13:02 -0700 Subject: [PATCH 01/13] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a0f86a2bfdf8..dd58af27ff3c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ sudo: false # Enabling test on Linux and OS X os: - - linux + - osx # Use Build Matrix to do lint and build seperately env: From f92c496d23c73a33cc537b876b9f42aa49d32056 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 26 Sep 2015 20:12:28 -0700 Subject: [PATCH 02/13] [OSX] Make OSX Travis work --- .travis.yml | 1 + scripts/travis_osx_install.sh | 5 ++--- scripts/travis_script.sh | 15 +++++++++++---- src/io/image_augmenter.h | 11 +---------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index dd58af27ff3c..88c837798c01 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ sudo: false # Enabling test on Linux and OS X os: + - linux - osx # Use Build Matrix to do lint and build seperately diff --git a/scripts/travis_osx_install.sh b/scripts/travis_osx_install.sh index f0a0be48a24b..04929633ee5a 100755 --- a/scripts/travis_osx_install.sh +++ b/scripts/travis_osx_install.sh @@ -21,12 +21,11 @@ conda update -q conda # Useful for debugging any issues with conda conda info -a -if [ ${TASK} == "python-package3" ]; then +if [ ${TASK} == "package3" ]; then conda create -n myenv python=3.4 - alias python3=python else conda create -n myenv python=2.7 fi source activate myenv conda install numpy scipy matplotlib nose -python -m pip install graphviz +python -m pip install graphviz \ No newline at end of file diff --git a/scripts/travis_script.sh b/scripts/travis_script.sh index dd2a62c8b37b..a56b73c469b9 100755 --- a/scripts/travis_script.sh +++ b/scripts/travis_script.sh @@ -19,12 +19,15 @@ fi # prereqs for things that need make cp make/config.mk config.mk +export NOSE3=nosetests3 +export PYTHON3=python3 if [ ${TRAVIS_OS_NAME} == "osx" ]; then source scripts/travis_osx_install.sh echo "USE_BLAS=apple" >> config.mk echo "USE_OPENMP=0" >> config.mk - alias nosetests='python -m noise' - alias nosetests3='python -m noise' + alias nosetests='python -m nose' + export NOSE3='python -m nose' + export PYTHON3=python else echo "USE_BLAS=blas" >> config.mk echo "USE_CUDNN=0" >> config.mk @@ -46,6 +49,7 @@ fi if [ ${TASK} == "python" ]; then echo "USE_CUDA=0" >> config.mk make all || exit -1 + python --version export MXNET_ENGINE_TYPE=ThreadedEngine nosetests tests/python/unittest || exit -1 nosetests tests/python/train || exit -1 @@ -55,14 +59,16 @@ if [ ${TASK} == "python3" ]; then echo "USE_CUDA=0" >> config.mk make all || exit -1 export MXNET_ENGINE_TYPE=ThreadedEngine - nosetests3 tests/python/unittest || exit -1 - nosetests3 tests/python/train || exit -1 + ${PYTHON3} --version + ${NOSE3} tests/python/unittest || exit -1 + ${NOSE3} tests/python/train || exit -1 fi if [ ${TASK} == "python_naive" ]; then echo "USE_CUDA=0" >> config.mk make all || exit -1 export MXNET_ENGINE_TYPE=NaiveEngine + python --version nosetests tests/python/unittest || exit -1 nosetests tests/python/train || exit -1 fi @@ -71,6 +77,7 @@ if [ ${TASK} == "python_perdev" ]; then echo "USE_CUDA=0" >> config.mk make all || exit -1 export MXNET_ENGINE_TYPE=ThreadedEnginePerDevice + python --version nosetests tests/python/unittest || exit -1 nosetests tests/python/train || exit -1 fi diff --git a/src/io/image_augmenter.h b/src/io/image_augmenter.h index a9296afff3be..cd50c5e10b08 100644 --- a/src/io/image_augmenter.h +++ b/src/io/image_augmenter.h @@ -90,8 +90,7 @@ struct ImageAugmentParam : public dmlc::Parameter { class ImageAugmenter { public: // contructor - ImageAugmenter(void) - : tmpres_(false) { + ImageAugmenter(void) { #if MXNET_USE_OPENCV rotateM_ = cv::Mat(2, 3, CV_32F); #endif @@ -211,20 +210,12 @@ class ImageAugmenter { #endif private: - // temp input space - mshadow::TensorContainer tmpres_; - // mean image - mshadow::TensorContainer meanimg_; - /*! \brief temp space */ - mshadow::TensorContainer img_; #if MXNET_USE_OPENCV // temporal space cv::Mat temp_; // rotation param cv::Mat rotateM_; - // whether the mean file is ready #endif - bool meanfile_ready_; // parameters ImageAugmentParam param_; /*! \brief list of possible rotate angle */ From 11de65de849054f19462eef30991934b896ca2ad Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 26 Sep 2015 22:42:20 -0700 Subject: [PATCH 03/13] [DOC] add program model discussion --- README.md | 15 +- dmlc-core | 2 +- doc/img/comp_grad_graph.png | Bin 0 -> 14555 bytes doc/img/comp_graph.png | Bin 0 -> 7954 bytes doc/img/comp_graph_folded.png | Bin 0 -> 17077 bytes doc/index.md | 1 + doc/program_model.md | 412 ++++++++++++++++++++++++++++++++++ 7 files changed, 421 insertions(+), 9 deletions(-) create mode 100644 doc/img/comp_grad_graph.png create mode 100644 doc/img/comp_graph.png create mode 100644 doc/img/comp_graph_folded.png create mode 100644 doc/program_model.md diff --git a/README.md b/README.md index b7afa97dd8c4..55e5b9ac36f4 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,18 @@ [![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/) [![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)]() -MXNet is a deep learning framework designed for both *efficiency* and *flexibility*. It -aims for people +MXNet is an open source deep learning framework designed for both *efficiency* and *flexibility*. +It allows you to mix the [flavors]((http://mxnet.readthedocs.org/en/program_model.html) of +deep learning programs together to maximize the efficiency and your productivity. -- Who want to apply deep learning for applications. One can use only several lines of codes - to create and train a neural network with high efficiency. Check our - [examples](example) for more details. -- Who want to use it for research on deep learning. MXNet provides flexible - programming interface for rapid prototyping. For example, check our - [tutorials for Python](http://mxnet.readthedocs.org/en/latest/python/tutorial.html) +What's New +---------- +* [Note on Programming Models for Deep Learning](http://mxnet.readthedocs.org/en/program_model.html) Contents -------- * [Documentation](http://mxnet.readthedocs.org/en/latest/) +* [Code Examples](example) * [Build Instruction](doc/build.md) * [Features](#features) * [License](#license) diff --git a/dmlc-core b/dmlc-core index c782236fae87..5aeced5b84d1 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit c782236fae87c6177adcea7247bd945e629d8be9 +Subproject commit 5aeced5b84d14e7ac50e82445eaeab1587313221 diff --git a/doc/img/comp_grad_graph.png b/doc/img/comp_grad_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..7b19bb45f703fd5b5b5d1623649606a72b99c742 GIT binary patch literal 14555 zcma*Oby$>96E{qUfTTz$EhR`vNP~1scMFnBNh4j--O}B;0!w#G_tGf2uyp#}_&m?` zeSdt{d%b@Ud+*$HX6DQ}b7tmu!c~-{ah{PqLqI^lk(H59ML2NDT*N=)W%@lo1g-pF&$)doDmQRT%LXqp%#?x2nc)*vJztI z?uJKM%C@xgS-gcgjjdDcJId3+=nKu!cWV zSY}PDKhji_jdcA5#StQBs!IsQ;6s;!`y;j>j!{fW?BbR2qPyl$A(7D@tfMZ38i)U3 zhhVHv-c{ZT5l8funbv;Ss9^pkLe+9pT&B&e**0<4cI8$5&RY(HIw)2`%!5!IgBmjs zP9KCPPJp(f2t1|39?rlM$B-pSgvJig0PEc}YC@!%3Y+pL3D({~o9TLk2NJYTb%W+8!R{Cv>nf)^2Oxt#^ zQ!HXwy?sZh!4y1go%r$<@*~Bn)F(Y3`&{iogP#pQ8{8eYnJF9u=~pd2l=~Vzk8>^0 zFMF{%Ze>6!RPSy~>8A#zTLiT4jrat&*Le9(Kef6^S7QvSG@_^v<-c_^8cbYO$9Cp9 zXmmQJNrt4pO)f_toBpBku<4+{e-WU`J(!%~s*$Q`_vvnCNl?rqL0uFOlj*2`JpO!a zoRx^VCR#(`dEUMw#93Cd&U7lctn~BG7?5Oh^#Y66QrYp?3eBUxwsgeuZ;naazTgUH z_3FoD@l^;``~R%Q*)j%)5M!qg5|;}YO3dcNj(@ZrWfY(j6&muH-STLG_gF9U$+`-e z?g%PYh5g#OB6g#HQ{Ph(I)Xp$Nd9qMva?z3y1=EQV*=EaKzU!dNY7vnHS;WE;K{b^Y86azH3m!RAj#4fuTw`fqO&vvdBmc_8`fhg zWU>Ee8=SHD-N@XyiQx{zZPtkA-g$Er_5#mkp=ll{Q_nGvg)y8I1YJ9D|3{7)t(07) z=k{q&M$|gIZZ;K`J7>1`-ATW8S-5gUn}Q8o%UUqT{KX$Y~Hj_j4%|cqrm5* zpMN_CXK@r&p3D4hFl5@^y#4I9c+?VREPU^yB2zN`o2)T1BT=fbn zp&MJ_sa`p`6%A|wNk(Zq=E0x4DyZuP$sblaEuyF7KxCP7AcTIr9)XjA?t)bLE(WQC z;8CjQX|s9p_<00w0@iYkB^iOgVz(>P$12G##U2C1b46#dJURhF?_~-Nt2wEOk z%v9n!JN!rIU+9A8#fYryzOa|-35}Du)`usJ&}M|~ydLn1{Zf(2qbgt-C=y0){8ci9xkxC^^U*6!w!=F8d1Rgv)m@KKLq0<&ABz zdiXhG1@AjbKR_OgU1iig^REZzp3j<3m22K;s>6tC;+y#>b;mca2YLhEv=Q9xL(2cV zo|F)v+pJiqAYQjZtvWSm!pAa$2THfZ5=fk_WU#XW%5X*j(y#!cd#oE+XKJeo(d|m% z0I~Xw4jcT3aq;g_Dv^8*(XyVNL=--Gc=Tl|Mk22)YN8vGLlXFd$(Yk!-_S@auX32p z)!SO>DBocg%@_?+4e)Bk9~~5AQsa6$*q4}a(f`=X?+3&nZRALMU_?O|+UCtopK(`^ zeZni+G{gSa30Y+Mx}EHP)~vcjCOSN}hW2CFTVR+II@svFYM*PvK@fY0ud-9-ufs|E zxnX8rtQS?bRVr56itqK{Xd9G-{&0G$0AT8WY*%s#mVD)OQ7&f|u4@f{EVP~&FRY@b z=;a4QzSuo6k0;->0qcyvaJ3%0MB(g?8EKol`t%g@IE8=!I7pJ2nd!Z8<`hr25nwC&Cgorjl)Iq6wJl_phC5H1i#Yte;6lKOrXj--Hp_*_D#l|_EK zpKd#6_iUY%7N0o5(kzd^-HgPQVOfTB^2)t0z}dNAR@8`g%ZDkS=1{F-z_S$ z={tC2`tEY!-P~fa8Q)xx^ZpR@$m5Z~zznV3UbRc4%VzqcZ8|ZClC2O;L&rZpK2Bvk z$x~r_+Hi-w;r_UjQhU6yk-PMYnhvEu4s3<(u&x6UoGcqsEMNkOd(+%}-Va8{a|YSl ze!2A7XNpJ_62>d~ZRClmXUxV&y}=OzLAQ5~W8xNX*ODwZRH=7Sg$7-t;G21X}Bj8cVc{e#z_c1GIGEMF^;Q^8Gv$4tVgJ5sVoDGh$i5| z&`FqA924(y*@a+1G|bhEq4-#G5jeHCElF@L0?&&(9dugaI4nxc?X{U<+AFf=t0Pd z7M3gWcx{F;{7hefR)O>A+c~Wp?8eJ7D%}m<|J%7{mu9 zPU&FOI&S2W4>+OGF#j1IH?(7?#VS@-qH@Ka86bKpi8ipGPfp@vvvTEpPz-2-Kq`J* zu-ka~_^b+6l&dpuj5$h&;fSapce;k$&0ta!c11um%F>(DR(yKFMjm>(5X{M4Jl^0* z_V+I3r0%ia!fcQ2#ewX)G*-4IHa7CCGt*6i;4gZjb_kTKKF7^ekI6OUYvnw07-{b* zw(Yj;a|jk}1=<1S`;qX=@TFidSVYLz9$hi>$e7E^OXDvkFGDCuS50LrzRX=NEv1jp zF}Jq1YFnJaKl?r=3!Y6?>K&U+VNi>au)=;6R&uzO^H+CJZiaz?9TeM{q^+$D2d6b? zGED!1Q8#$fnsAF2PQ21+=QoTpHT`x5#BOZ!NE6Hat=w0Zo@EXb#PlfUi5;q!rN zk-vh(;~ns4kHn3b;xb<%W5IE0yK>rUS-|mbT!sYaBe}`>{(jl+wiuq?7AE5+PHm@vB zkIDJ&ZV`NzN*8YxD<8i}+^f`zzn?NdKsk{CVmH;;MZuW} zQqf04%>2&_NnRwC(izN=7J)USuU@@+^Gf+59gl$Z9o&^1du||X{No?MQVb)Di=+XP zigF^}mfRxjjsv@ue?!2z4Me?hV%=Ys25I^1`TX9#eJf}hq<|{z(W=~I+WqIHTMC4d z5hjbss_{6l+^79^o_Ot@H*(B=9rt<$Uq2= z+ens-l=1SZ5F#VYuhXRbCMoGaE5|%+G0!?>prie4MQ_aqbBB{gx3MyIa=DpQiUG~+ zK6P}dskymSm?}sjpbg^ndeP8qFP6WZ2}adv&8UIz&?>#Gggf|HISVqy^78Vsj_{8& z(V|vFlFLKsh`25Fe2lL6A&r691iwr!v!Kt61`P{fV5t6Q2lVybLQNcTC|)Yz9W`+= zPw(Q_$agw}t2TWlc8X-G8nXfB|}GKva)X zQHrc$7?^~^0$GDmxjBXrkv@V2r>{bIy8pN<0Yv87P|^FS+ce!UJOZ*rH-3NVHEqwK zJQLrTql;?!NtQ2SE^-WrZ=jCxSF`0>r$HAy;@u%|B?iJ6h|ViQE@F?5%LJ}iO%E(V zv6$i>#R(X&?H&o+#xhNKYmJk6 zugBfmozHd^FG#8BycuCLNLzX87e}DfyJVd>?cuH%CgA5%YZd14maNRtvP#`n@&ruSw*z3(&=Tt~B(+7{74W`X;r(&%e#X zb3d~0Q>j3$c27>&JL(`#N5gEq9sI}GHt%^I5q}8t^Q*F*>$p2MDL4pI#=9KOa+l;& zbDr%KrB1KvZB3TGH0O%0kN&z`T)cFca|_-#nzN%kfT46gVqj$LDk;e>53i_J^3JB+ ziRD1l#wWgAa?MhD_?$8v7`Z8%1g_`a@Fq8ee>%H)5q(1qWlEAOFE8ImP4kctobyxL z`y$g19GE{Dl01*eN~T2jc~+}m%NK*QII+HJN7M*P(_JUvet}cGlIBnqZ+=Kv_?`V)0+VAcHO~@qhTRc$*E|_~34KvjaBrIrW8{ zihtdz6J+M?2yvsEK|8Pki7scdXGR8Z%cS=>a*jkYfL^Cr2&nOVa7Kd9f%%Q>8q3n# z?DWeJ|C^8!H~)VUQYx5ITaE@!-A<8#P@aa~jPUC(v^fw%Fdy zt$G^PJoE(ZYQAlKC2bp9dB2G}6t~?OM4G$0pc4;WY7Q1hrKl?#HZ(Zqv`?<2Y+-|; z`qY7h!jfD3&mMSPK)b}JxV`+kE5b2;3c^3WM(@vN>z9zXt~=m8ISXme@G)n9aRYZ6 zk%C?Z&cEc9;y=mj%+5Nsn0*|_FI_tW!#3Ccxg_fZQ_$UOo4Gix*wf_)!(i6?`iiV+X7HMX__#sbA$J_(uU9SEVKAsI`{zETcD^o1vTu33L0Wf&kIVbcN`Jf={3@BCePi06Qz|5$$m?UmL> zh%hm!=>Ar1Rom7UW#nJTkoHVLk1H^LBma3+x3jY|p_b!gVTx=0V!g;+{lD|7;LV|B zEeUZyGe)nE_ZG>50I7^+RcnjBIZ(uz`&_Xg`YvS)P7$lUK=8SoW6g|5KLgVR1xU}c zNxp64AXZfEI*LOKuKR(_JClQKVaLC(rl#m*R`3B|h~Ubh3Y$|hwaLOy#r^BoM%7Yx zHtcLckGht+0OYXFTi~!(>%O$(A6)xg^K$o$`Z7$QY~cWMstdW~m|YQ?)d76J zk?Vo&u+eOJYYlqoOH%XA7;JoJ?0C-xra%eKhe#s7oxfM83fHKi(HxMP*_FBelJc-` zU8=5HjhTiP@6^@6jo`7Rf?;E0b9Z-ltULQ|147L9TKFQdZ;XOc8^7F))}(Juh!%%i z06h|d%B?5&*X-Y*{V#_f`L_6$mly+HBa+%3Lv3Sl#y&e2-mZrT4wGLGJX;~e!4qXKcQ=WxJ9jF zyK%11x2+ptgfy(jospplX978b^Qko=S|%DAjip8P4DM)OTWBAYl&X!Dd!&FHJngD| zS82>BacxmDOj$^Hrc}FGa<%=%b)-IxQ~6%^P$s9OJpCz~-$P2th>X8LdZ+LaYB36u zEB>k9*G@h<7K8qh%~Z|gO*SR$i|1a9({a;&Va^srF(Ct2dommH=5)DljG1c%G~gCX zi*RmbENaoN%{^uZ2D0fI2;Y7{P5f<*1E8=Lt}>v$f5MmbHy*P!*TatxdXO z2eseqZfLnCIM&3DM;T}Ad#NU4IPxbfGoFVJu9*8aA8zD5F$ z5?3<&?kDSr0rj!hW1w$=(@t(~T5Ufx9Pn7+{qko5fKRR6L9b56W4@i!LLgV?)5L^# z?uSBW*gU}?t!)Sz)cc2d&^b58W`ybwKxg?RD=8^SJV*1J#y!1#wN&d_TV3SgLe{{M)t= zYxf7F&Q?g*?XB+Pt?5Y+H!#bpIm^j%jbi7F^eh@4aU-&-zXp5UL34F9J}o( zLh`W;&cFrfat7Xi(=1syDGq8g#!P8*HU7M5Z(x=!Oi#0*l&yy33n3Y^c*ujVfOFXG zC9h`U-N%XRLUM`{O=mK70!s%ArzY-1|L(Wd5SkyvmLnMf&5s7mNheBgG8!2dw?2-( zwg@(i3j0@KDSN803@Vm{s*O!C0u>en5_za-2!&YZ-^&SKOZ>AzbWMC`=d$XP_4g%o zkE7SNHYM%)t*$uIqPR8aL3TBDb*93?+Nk?|cW&8U$A8!!yEiRT1~$}`OU^N$8zvWYKS3xKf4oh~uJB#W?*=AUk_FPus~I-hL}ezK>Gy~TqC zGuQBsF(OdcLEJ0n!NjGWH^VmH~% z)SU+I-Z{prliz=@2SoZiV!_v5c6L!mada4;2K9d3g3c32Xq2@Rsta%;J&-TL1Lb{y z#XAr?FDOg=6Y99JmeJP0P2~C0;_L>3!a8M3?~sXsDdPf1t9ky;cn@Q3Z7l{I)Di_3 zHz}U8b({Su%sOA~yi}5uLd^VFRF50dU|3V0K^LbZulI37vsB%oYZI53_roB&JtXXk&X@f;pEj$u42FNC`a6Up3EM*c}kx#N+7gSnT;r@MyJ{ zm8M`8KMw4vC^O|(WkF|$^Mc5V*=?A)uz5%#H64PQxi?!;-++mnd3&l#^xfm3 z>Yz`m-0;^X%(j`-Zf49%g(2!zkqK5J>$XJp?A?&u;xjK-RdJUhHW1>4y50f;mvpsi3$xp-LfB2MfObiD$vq-I;t!wSyOv|cyy+E zQDv!@==Ry^=qk0<8$&p;1BZ;Xd!eeV8*Z9FCfZjg7bTm%8^gg$V65MQ239uL&XK3a zw#*Fpgwh_!`k1>O8kQ&Bz&1gc|Fy>4-Rqs!hpL0CsbWa+N9(CMmGb0MXO zvxkiI_3f>vEq{0Zdf0T1Qf$#6KTvbp^P$-1-Vu7?@M&vB$#ad}diHp$3j;GldqmG~ zsDSnrO?EKPwzEO6AyZcNl+uUoozc8WzxTYqo~DB~GG8^2rv zCta`^KJo>0U=$)}Y>9`!e+a88D@tGyR!OJr;7Q_CD*}|vAU`ZwvNW+n5IaAaQXtuj zL^l>0i6t^-QUW>$L6UHC%w(`%jn~^pUXs_r1t&FrVJO1_#JwVRI9`lfUD}289CW8} zVkJ*rlBf|#*_Pju&@jefho@q9FgzOdH;0m5PdlkvabdnzbU{qwpdS0K#wrx5^y|%l zl}bK9f_18(*Yo%^kVt7fZ)YDpVMKt$X!)>}3T-IxEdv7s-;G&PldFph-By>oYdfKIT^d=Gz?QYoMesV&4U%fVlpge0vc_Pa_$;KR?A8qI>DUm6b@f5!E2I z7<5DqM~mfs1T160Wze7kJr!?B$jdH3oPRwK7`M&&5l;ME@vMwYaZU>K9)oFwD5I}` z4BqeT(cIM3-|x&hvL27JFG;ulJ>k?ilQf(zF)e{tG-`PTKRSgV?qeYVPH3)b8rpck z2Ivrb3&dG3v;5T@XP%P+`{jT5N5F~4feG~4Nz&@g@9+3o!vKwJs;i?R=VdqK>`EHt zY^B}DD?mKybS0!RilO9A z*(Vn05sO++<<6PbI$AMXeoZUz(@6n48gX*b%pV=uHL4ho2KJIT5OOLkSlKK*mI;5= zJx_BC3hGiAxjQ_x{rp*yxAeuQ!-9AnpfFJ*_Hp@I6>TL*8-vYmj0Cs8$SI!8LKoU3 zYmTe5YCcGIGa9*eh#0OL!dU+5m7 z^o=CBeSElMbURsTGGvn^Fz$Tqd~&uS-3tq*BIo~T{enG*+Q{xJb1a)x_Ju$0j&K+o*7f?sU zKq^4-uiSt=JopTGG&^qfhf;pKo|LBKCX14eq40qZFbFG;&kD9%D1-z^zQx*)cfM5!{hU*BujlAB#6+j z_FAIzg~^QHn(tp9eeR#G4yZ!k?zO9mj2`y2+zGYPW{N-gI;Afx5G!R#sp9Skk$t&) z*>T+mhjV@p6mUBhUHB{fj7U`VSijk6=Lo!J@r(HFiSuBC;F^v%rhWUxitke9U)*Ed zFz=lVqb}xk#OWktzjE1zC^Gi*`vH;1PX=7$qT68m)o$hz{uu0h6Prb0<~7K%%=cV~ zG%c;lCvVZAGQLyn?+i>Xks)rJ1}Z|a#FoQqSmRxNMbE0e4>;!}jf;7t`w_ML z8w{u;HMPPl@uKC4j;Eqg8%Hh|=Omv5N0Q|YY;Fs}Xri+3B*w#Gf1b*JjVTA_J#nru zcw{5y@bd1w+648lZRKfYG$?)1$KPKfNf?Nsl*h0q_stjgilk1Q`_B&s{A@F;WuFDq^D%AckSl>nT?qlHLEsWO$)QP|J&34D023OrIP30F!s>9 zI_^IWMv8j0Qk#Hd=-W*+H8oXmfo6%Csohb(8?jN{;KBg1zkm`p+M1a^ZHQ-03kA~2 zMkuAJT0eqP?;|U9hK=Iel=cAkcJ_rNDv-1?oOr5g&9V}>-o#4C(~Ud;IV%j?svC>%R?g5}ZT=idU7b_x&vrebY^ zhBSM^b9v+-{@SDiIP>CozVgR1D9%X&gRN?8Uv>kpSy(6dE0^*vt%_zyewli2SE}5l zt$xr1VI;ccrG5w6tvHpfU)tx53fxFulwYW|9#B&qN%gI86n?#qyx=ZbAE7Iy^gi)t z7U@Ual}hxR{a_=Z(247xy(JLAc5?>qT>Z zCb;9_sL^KrB(S>|`XbuzE-9s9Pe&y!zuAYjhwW4T!_9>6@u!Vnmp}^^wDQw&XE)zrly?UCb&9Z4UUPMY6+1$A1dBFO>aC-^oZO7*LJFWG1#neV{`}v#dhaMv% zgL~p+$Z7w9f-2Vd&`|hHNS7C^ZT`!9TT6TYCC8*(G!{FppSMtt-&yM(XeYTA>!aSK z?~;#R%|`|d89?FCH@|-_`y(=yy&OO9m>#KGIx5~FKeE#JUHR5iJEAvqaC=q$?AN4| z5LxlVv$ABVPh7#i2_JSNF_@(jeeVG zg%B4f`FNs-*XWTo%1%kA$QzcrI+OMv%n(wx9FKN>&)Q@G;UIL3ApsSgG9@HTr_;GP z(fmtD7qGnf^mJG8gGY$Hfoi+MspYc^e_~-PNuJhGrSD^PO`|HpOETValhakV$zT>v zJkProWzN?$@n9zGapG6V2gg<#6Zr`3qf6*(49i1l==K=>?UBCtm!jt(QAgnrrf zf#l~Mk8ai|psNgJYEQ-2e4rSsv;F+1cV4CV&o4|$=JP8ef8=cowG)5O2s*<1Fs&;6 zt`3aphlb#V*5@*Au78nRQDZJl-;@n5FIO$G?QD>Lj`J5r@V%2QlAwbhm(CnZyxMHw zthUTf=^tVu4{cJ{_i-^m4SI~GKyd}ihbhk*nGz(!{%@g ziESxaZkhSe3Q7$5rtg$Mz#5zBZ_J{ z=v7HXOzddFI>2BsvgK&}BuTRs-=X08-ue&d5e~4UV6-9I;J%=oSv)%v^23`ep*}Qk8C6dG5vcL^|q#U z$W#3ia+FZ0fNkW;d;0W3Om|(iD?G#c1$CZL2kE*WuPowo5=k|gAK?T}lg!?L$a1CQAtr_<|@-L~QM|C+1>E;j;@t(WMte41b0p(YC z#+=0F$nfy@-j#sUS0XK;*~`?ydM`Nh0oNaPOf{*@@1N1uJY$kn{>Q()R%2GAiHG|7 zpUcI93%)2_WPUj|{~a7rMw9!wEp)c@81+_wE;S?B6Tl7>^mvh&;pLFW3ola&uG-DY zCwEPEib1c}MMHYccOx{WS9TxXHE&etuc@Bbly40!3xaPP4n%gc7%hNm4Uf#s@F8ck=NuMk0sZwHe7Z(&4;Cn`d?o2De-cH)rA`0JEO;K z{xSHP1g1^%Xw9S?7Wf1hLl%xu_%oAm)j~FXa$tfyi{<>PfZb^t56CQwGB)f%Ju6Ty z!bZ%V!q@ipxhg`(B_#Q45=Zo)sBx<{)j?84kF*Hat20fMguJ_|=r^Fl!f!7z4;~;u zB9<2>3T5sP7w9TVs^O8S7T<7Ap~rZ5{c5*pj7$m)&b5PZwRElEq~RAD1uV!&`+zJWnuM;pK?tqr`?ybW^krt$|J{Z<)!Ss(w?Eq4!}@(6%B991t&k?Q{EA z2|qH~k`8x29YiHYX4DpK!g`0u(Ltfo1s5t z9B2M%Edi(a1y1JHso+ssZsUmm2u2x0i4MC1i|a7*Ms}rM=RKm7zI>E#^lu3U0k(x`~9FF$fPE?Zii;a0CD7?h ziDe=vgoqVe^|m$ramFnDj65v_-BTPeHF>9%Ci*}nn0M|*^N(}zDcJ<3EGPbmUh%Z` zjO()x)B1%=tfCRKfm=g&`OEg|5mG5}2MW(lyHDH}Fm*Umc%7#QwFf&#fF{93dTb&m zjt3#AbFEMn^UWm6of|rrntr(dxFbqxhEzYQCDu5p=7mg4F2oEcl3T}@K?XXiNc-5S z5jbt(I_LB?vLL7RXkd`48;NapCbQT)*o32jvmIF;LO&G&ItGFu4P8Nwc+)d$0D^5oh)LzoYwzf(` z!gv}+7@f)W^0E+#AU^sNub^I_r>#_(FY1Q%hAtWy2l+ACyh^Sx5BjX%WhY7m-5>uE zSO;OcTJTAf_pO8(@ zflT{tz!sXs0TFu9MJzExy@*{qN6_vS)=1tf$nXnbR;-d z+ynX&@S00eH6g?meDo-O#^Ag4`P{$<5aLv#=!5Cj2ifM2kt>(gLd6@v9;T^WPCQ4S-b$ncpaJIB$Lh)N!8h|LS z;uY;Kv5GD#OBr1e`mQ%6U@^LGz=ZmYFc+HXCT$D~Z~3AL0G?2r)!4{AgN)!jE#nmT zHkkLGK0FYvMBTK@gF)ae{~o0BE(Nd`0ZtVv^dy50e^|gS2b*&{N@aPliud|ml3X3?*mB?rPXygw&1ojx^I1mPPx0 z55UFV`9C~7P~NYj^YuKnax%Bw!+C-7s!U1(iMYqav#^*bfkX@-Q*OyOqbo!Xdd*l< z&=61k9x1>!EOYNCo#Hw+0AkxGntv)5T(w^b=@Yr(A`)Wg13yY#K00H&XD*| z2)Tl{fAV7}ggnn-u7cn0e@=eMS%{x{mV2WIYHMt445rG6Lzg9GKLvW^nftJgzry49 z%EC7i2AK4ZKmmT!vkOVJs}E=r&(dS0F(mVk-g*&H027@s~>6bPbewJqb()A#{T6T6{7=AVnb~IfnDNSL4Uin|JnA z>VenIzE*jml>_*h*$;V<3a^I+8gsnGm;xQdUI=c{K3c$8WpSi$?RxLimPPN!nsEkQ zav{^<+vz^n8=+;8Q>^=_BP#7r%Wvq0Dypl0pLu+BLPxljE=-R4ZT=ltixMI3Vr^wL zms6H*CRXpee|*YPD=fcY#W#_Ui%f5H;U{9aVmBxvAr~7pTzj% zu?r->v;K#Zxs450uzYN^P@q$@f=2=o75p-#!zy?3i?Ld!Op*fN^I19CVjBd4gMN>- zS*oQ4P4Y6PPq@M%NNsJd(lLUJGZ6zRomvop@v*eDWYj-iX(nX9PHq>9CZ^pJ?GHyC zMZMWhP=?E@_)V~^Xb3$V3u!_!++(G5jX!zj{jRD0-7!hvK| zZ`AlLq->Bq(XGxg$&FI}hSNiL7WIbyi89O+ecbf(7B!BWK&3?^U-i(1c+<*Z8Tf^g zO_W%9)B9q(K&=3&br?coQpaOV*{84@0pvj7m?qsTZ9vXPJe3--6?#sbXtwec~)z7ytk& zTqKRZxP~8d3w)NtIqQ2N3I%#MLYdjXoet&Dt57^(u`T`_ZVmm8^PaejFuFadg=_Kf z%>$P9Xl!bLn#R)4JM)OytabQfgL6#~D(7eCLpuZcHdhN{%3UXccudQBk)S^+(4qbW zNTX9ZoxLRH7A=v3gdK0awoO@GcprB78x`=kvhkFCJ0bDoSM$%>==1&HQinn!)~pvx zq<3#a;q*#R@8?6jx)4)y+Acn^q_^NMNiFu8hV;e(FsQ6D0Gk7~V4JA(om(25Dl;KB zkMsYv+@ot)vq&lK0?gBv4z%nX>Fgo-47!wG%Otdf?r8>L#?{d216agc$O;HVAg@k| z&RQ|h!u%x#D|S#b9;hWk0jooIR>{Nc-gO}j>RsepdXlSsei1=RApPpKJ0zdG=t`tU z=T~6J=Q#P@AZJ_~k1of7p;6$NlfkiV7X*EX?*v1VTG)~zCo9V;OgTPohUFtyQZBHR zDA|GK70M8Fuj2f_{zu6H3tx=MTu8zg37jVxN1vdC<(*$ znErS2EU-P~k*OI;M=s|mq7o_EhGr_%#E^zFddbszz#`+V%VjA@QD;08cEW-;_&bU~ zN9mvG>fb&?TKm`sVVi)5^y0LV&JBN_7~73D-wi#MLt+FGZ~{x>lm)caWVBoLVK`%y zcxh$#>?L>b1S?V5!0WGaS9;TJBLdE!IyR1fJ>@;Tn52L+x+l8%daBFoj=8Hq={^r` zvp$Sin$~5;LNNcS_hZ|xg=1*S-zO6<)zDqsGJ%ig<@bw2b;A2}v@ZZe7`PfW)UOC# zRVtpXNKyo&WL*pO-2$A(OE^r@`ha^T{3WeN7JY(U)xhPA0@0`0B&(|r_WZIBfRtA_ z8b#8ElF4@aOQ0SVTLN&pr7 z>}mZcfD3vwt#LxV7sADX8+$S?PZtyZ|1RS_?NEKi*^j=`3DrVn-rB$=L<9s`NhOIY IapQph2XVQ?jQ{`u literal 0 HcmV?d00001 diff --git a/doc/img/comp_graph.png b/doc/img/comp_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..c5dc88b2b882bfb7731d8835e89f0e5074ab57e9 GIT binary patch literal 7954 zcmd6s^;=Y5)aVBU1nHKcQ9`;wBpezX8U++3C5CP#1{k_UYUpn1ZWu~Rx*H^=yLr#| zzJI`d?r(RVXU?26b7r5l_o~lYJ49Vo0sk5J83+WzS5lPK1cA_uf%83Vbl})=`i2kq zKy%VmkOr0hq1gd0Fw7w;5D=&$3ir+k6S&5)SJZO?fk>SHd!sFx(SHMh=<=0hA==-* z>@V7>Ynit`J@N@wYUj3K)7BU(tX;9;ac*Zmqv%lG0xu*Ah5aEjFUl+P_hO_lATx_S zUnV%l3_V!j8eF2X+z;OgXKS4yaXU~(xN%qXP|R^>*i2)%kcUW5`WIkdCY;h}7PYSq z_oc^pN{xpMzYej)9llOmK22d|)yU4y<_G!o$(?$mgP`bqXdrYVQV;{^|Lc=GGEzFH zugb9rwWEIy(Lo_6YEZl+FyXmFiId9^t^Sjh?;oDH@h2ibY87yl2z<0VbpfgD006locQh}eWBzfz`|z8F0`LLObxD|f;Tra%-$ke zhcCf=?)czQ6O@TCSga+2MCF4r;{Xm+y`A$n=>*s-wNLjCrd)o9zbYB0aj$xVvJ$7INw;hM`rlUzFVOa*jS>& z>|n5k&Lsb9ct?{M#~iTd#y-6*xdxL5#`yK^*O2^Sdg2X=)AlJwS^=Mt2z5(LLfch@ z=8$gUj*TG#-KktA>9l^xrLZ~DpVcXr^@D@C_8-!!Fh(nA2w7_AQV&O zmDA8Z8frL3y0K5uZHTbQ1q3QR4iNcbn+eHoyg%MAWNvdCgKeg_I3a@Mtyf!EPvCjK{%uXF_2if0P zwM!2&5VNveozH5iYMzj$04qeLNx3UxAE8=IdoHUUOhQV#DQ|mnvb8}%`g?5Oloj(= zhEJkKvCywo2qTz=smuS6q_;7(v5?!Q*d=$N-v*FlaK;1r%hv@7;lD&4Eh07>a5IOM zM|NjJp}>!4-+!IEP?W|MYUJDFW0$|~?aF9KRQVGBuf}@+{;wwoKOY2boEJ{TLR|2n zBkBsLXST6tSX#|3U}NTA`llkCXp4`+fi#{4Fg2zRE3rC9$4rT^RwRc*Egi_o~;&?SF_I7=kM(g$J&=*JZ%$d-l7_n1Z%NXtVvTj^#kY{*qP%oJh43+t4&=R;$wnXiPI6=-P3bnKqf`1bKo#v5T_~AM|>d}&$8?cN?(tAn7$B#IY=vpn; zohiBQXzL0I!KLK>TR;~)_5>-!vuKdzIWaZWW9Jv!pwI1n_M;0^ZBzzb#vf^j%Otyp zHps#AEk%{J4NbGh9lL3TIZKD4v8k!)8vls(7YYM}#?&PXt}{I7y=7Wvx^MEm+89VU zrDphQo#?kM@wfMBe|R_^{K=hC3QW3q5-vh?*{POw?pOPX2TwdnM_aqg&aZ1@Yim9Q z*RIj{djItsHwj<&-$_DNS2jni4#vjoJrM&5xx(v@Sk=@$LWMpJVO>ftP zZRT>+*(49Xuq$hh=>4@5AqC_%TUu)IxZb4c>gg%)7}Ue7~+)a5DiTttwjQs6p)wVC#l);rn~%NT0*s2&f{4P%N33&a zJpa?jk7Uo}^z^9iOJ`L9uXhqF23%PE($Je2eb|7HFRgPmLlW{-YSpdJ6jt-GP3{E>!3_<96$P1jUe)|9z1A>WYv_z6`_P&JCEfP4 zth%f}{;O_`7k%YNNRCM>!t?LRmj9-5Q{IRmKQUB&Dz*3EkMdls5P&)&LW6H^+}#I% z|1KyfcuP!7mqSS!{uhN3usE#vSYqbns z?=r8>r6mnFMWIZD$Tr`9hH~50z#@{e0$Uf7T%B^|cjGX0WHN%0{VF5=VNZ4iqE9^X&(^iFEy)8tFE--SLrJah&TW(Omgn+mIQ3Rbh~ zLXg-6KM2A)B5%eTV;kjN87FKf4$rlxcE*{(BwSovXW4lVeoV#`;t~-zD8hnBLn2bh zTceH-4@zc#q%{kIGw6A~9-nLs0M1jc>=tcA&l*JPmnoc`Ts z8e$`FpUC;QF|bI}pp0RyObECnu&jG5Hw^q;{=iTye>s?{KkoNLP&>SyH6S7-L6jds zh>6vMaV#@CvjYSK1<5KDutS62%0LxIugZV_{+%JiER8}u^v>e%sLNuI8A~@ZadCQD z&1=YFfQ_YhnHV`e=f5Q2!WSTwF5ZOmseCcn&!QMOZ-O z-AL=@*2PP?ezc?G zUixuwl6y0N+&3H>It(qWtW4y(SQdT|m=W+XB5*npsylC7_cDBO?)|f~ve4tnD8h0x zGkx#h&8A`EQi!{s1uN6B0~7g##O(67NW;O+)wJYK^psj^e~Ec7GGgDkejFYiUXC>U zeth>5v;B;bMHUb_)RXTxA*Ro31W@c5!h+ZNkPfGx8)CV^CG_ku}(#pu4h? zAN;7rMpPR^pzZ4Zc$)sWXCK~{U5R8%OB=?Pexwq-C@d@60`MD~k5MvV;A@3$K}SD4 zaL&1Y%-(Rh)$2hqw6wHzJYVe=VJTi)1!N;2)$OApiM#x-W`2g^MCnQ^Ijpp!Im93k zJ4dUXcUVCW`*<~aAGy9XBe7%`Vn70Z4LH#fPQIOP8I>v*U>F-&C=g|ESaG)vUByz* zB-gWF0@lyXt(vQ;a-{>avf?pl-Mf8%^B_ltU)*D8ES;6NO{kp9&+Eo34-F27C*-T9 zIyNo3Hl?Bv%Wooic;dM}efa!&|I?=II+@snjWG3miZns5TNoGn7u6Kejya4`BDwY5srd?`~?Qy`<6nw#I+_0brk+P1ZM>E3W%v4D59%WdX`t7f4szCY!-6U@!b#vIa; zl9V^Fgs@xoxPzp(05hK0-rLKG#!XdX%)}hX7P((85b89cA{thJZ|^v&fO&_41&;#WM(Tr zcaNy#ZsnnzoR+qEJzy}~>z_f-&VLxcstE&K4EakN(NH&+Yv*AIvXtz8oP=D z6SMjeViWW%>K2iVM!THMerxex^F83SD6i8&^-zisiA{k8aY+Ea@sLR87wR< zi8)?ix8Sspw=l-bDaLjiXND`@6`CASLAF)2|KPSv+Xc_iGfA*uc?|Y6YJSd?r|m8f z3b;v)$fG;dj&2({v;!6*%M?S#PR`DPKk>@uM{HhR9x^(UQh)h$-igI~fyaAdIPbMA zaR*RHQ918+h1VoYS&LdVh*GRahQ+Hz!qsD=3#KOZ?xV>C*PesHS=mX;6gVxJ^ZR)s zTXhhz6xDo`SL!lRA40)xI~oS9eXT$YCUw;++}y}2s?xe6qBWNq92gUszgDr#$XWTs z@M`ph+Eu(_mO{5@r(!rx)TFxRgnhJ4j!nVq**UL)CX1f}g7zi%rbAe@)kDX9hst=g zbfsMJ_4T3w(#Q9gS?`f}9x|LsKeD>RC>zM%UExQIM_!Xlgx1IloEW1_N+;e|g#TWI zZzi=gROawX$o<)8IKimN%c*S0zrFJ4TiOqMA17bI5t^07Bvw@RrKR6)mgrdf@HLqLi-A&!3Z>HegwxYg0P@1>k=q4j;&WRzFM;Y#w@l0FO**UE^t6cW z7Gh2RYm43j0%44J7TR{V!6Rb3u;Mm^_P=U7_`<4j7WOH=@yvFuOin4Z_mniH$TL8? z)pDE#KAkc&-{i)#9F9FDzu4O5k$)|Gclk);`6g{Y1?kxx`t8))D?#(9^kdBFK_~{i z4%^toq^h!#lj5C~6^Bjh&2F*vBj??DnYRS|?i&;eb#W1rry-HMUoEM7$X@?fW-ImG z=p_ukx85{bN&L`21r9gZ(9enHh@G7D)~%GS_J#U$mF*S`xSw{a!mQs2j1-U_|4zqD z7sbZ#rAOZc7c? z=w5iRyBS2$JN~9uyYze1hNv^6Bo|hrlGEfrK*JXg^lMlD)@=Ul!=gd^S~WXkE>qZI zG0GGn-x-Pvi!$g?aj-w#RDtQYxKF}rmYUq6=Iok}13Opiy&vH|hYR>w*~{zs>F~M5 z#rw4|9u`(sZ@`PIotBPda<>f52y0Bz8hgrC;9&AoQ#g~5c!iQ}H^~%ye=;t>YWY|} zE%+C5n>VLfZzDCaz>>aXqTGuyv#l-tBr7+!d&l{tNmScnWLQKwK_Flnf-FH-<3 z6+u8FIx;`sWHyup@}|ZPM!@}8Vs;YC6f#l)?A)9>owoZnj4!G2B5KD$P%PetKa7%x zosBKW`N%+yN{`Rzziz;H>P!k!N#_rl+$gW@g-`ZlI{|cs;W-Ms7%#cG4-$*@&)mg` zKLZ$z@auo(`AbIHbNNDiR?!Ic@Rs&%p|_Bayflft5+ECo1^|4Z=a_=E&4d5fAklitXH zZ}LBW7qMWew@&>@E_~So~x>#8|`*Ba(-c*H$C;R#p`WLD~*g=bqg%__xG<`axbo`I=#C- zt$r2v)P&1s!NR&J_zyIu68x9Pea{(-2qS4E|ZvGjrmG<-DqM? z(I#@Hp&;&g#hY4bfBixhTlIZly~;#LvD~^hIrZx8SOIM}x$Zw}|0?heE4LFcuU*$Bz}@R?7ugrfBb$*|a_YZLE&ugj zeu#%vO}xTc&<-#kSu)}Wv-cjz0TLw=WOD~ty^9_)%dTW%XDHU!I-m zmfCmAeYAyS%9o8>x#+D%6Pd8RD$kqu?ooShyVxi+SX7H06%tIkAhdXAe2vd)CzRl? zg*|UA%+7=|1V$rvA6N(u5L#T@Jv_`GU0Cf5ic{V=SZo3a8C40Dfy&Yp1zEvh1)(_) zHWj*8Exb-DDk`zHi;Iih-QB&Bw7BcIO2HwDvVH`99T}R1U@&+lMSM_EM~4y<<9>Ua zRVk60E6)Fk{s8!~ZT=kR-xL6cVe+FB z1Ar#x=jOH-6rX|~F`y{3sy;vINE-Vzu>eGD>k*f==GVKvSLBOw39O9{u3N1lxrnXYnj-KC$KP@C(YoP3c!IM#qF z(5IcN3YGg6l>3sBz#&VP{q#hnycNM8$M=(qvY2Fre&QTqTW-NhLdZ z0$gn(&Gq`+7o&K!MPr7%vZ$i|L{uCB(c7ma5tE%Kl8dcPzkyZJ!t}Ka?qgsv{D|6< zT#WT1xv_1{kb$;4kJkHf31%fpkHx*dj~aQ~x$kOKGZ#TEVholhGDbJ^QEmI#s#YhH zv^+%V9E^jc5*xcXT$U0Pd|7hqJkHi>%CA8T$qol-zr0Pxza9|S#QArH(4up*&1_}B zGBpZy9ypEWhxD1v$obLb)yVh$)w3>)K{{s4_a`x*A2L>z?9IizmWP50`(H~#+f&Lv z&9TMxo7-Nx?oPZ?{*kQ^g|)GAwkU_C6_@oE+OA)rGz65aT%T&ol~m$sC$RhL)`c7j zh=u=GJR&sa?p}KDh57k+N|5}6q9S%>{HdCP4D6X$#ys|W3_i3qdk3q=owun{-VaAd zy#I8_C|PyS$v9 zn$q-h47zfq6Seoj03V5gs0%*jo%( zjhK{FdZiDE@1KiU%5^mEtA1o}FGw~P=P90+|0 ziHQyH`=3e3+C+Nf=@^|zkk81mNsH0FOMam8L5sKarh(bwcQOwmopbt3j9H!UlWn8Pzo;aCPD=l?8Q>8qx88AYOSe+G_|zs7V07R;E)9XLOe9{wAq~;9k;Ix zs?6~37XfRln;%O1*!+!;89fY(i~!<`1VvXcPyRya0u!C9v9er%ylPMLqU(m3n{NSx z+V=S1F$Z`y85>|>IX8$gU%HBN{%t_j;OUvVZ{rSW2?+{lkO0u*f}pyC!};zcPwN#z zIh?5N)rTAi)JK%<1p%G7q8vtU=gs-Qd^Y+#`EE>RIRO9g`B3X`ASjSAdRK?-x66my z!(ofBU8ZyM^HR5a0$_)|*{Yyr%nrZqIROW%WRLJ{)ZXTeBpOyFCzrf-P=?+MFwn2F4glVWpsVFdfojsZ>}b6-**Moh(i?7xmr+C;v$v`jxf8wIT!ed=&Nr? z)(8CfOQyQEj*dweF%uCf@k*`6<=AG`T~F0d0}e$swduIn?dpJC4EWYBpcxG2L_%3$ z`FVMpo12=Z*rmU7oUb;M?S6gWh!6@446LlG0$O?%5B$yyEKKdOe0-{Nwd;BAUX3Zs zDJ_7(hINktePv0yM#l|hxeO>iZen=MyC@!=BykW zXuZ-Cz=L$9uWEtqj8H6kfbhh-FnDp2l-K+4^{WmoKA+l@Vy*-rLG&(tOD_`j|NR;b zPXW<)VT8xLFW7QI&8??9JXIt%M(-IIv(U@#jC4frVPILI{QK%xO8J63wEcM(#+|fs z=w($JXq}M-^cFBKc!zEJrlU$QLom*BJNuD8VCo6DUJ%Ytf27Z`yI7Lmi_|81avw_I z#!Wtv=?X$uYaZy<0|s#WQH97f?V6T!e&T5fD}yHkiw{*bSoumV*_`gxBop(BIlv5C zRLzSfMbngdTkk*rb$yvI9cSji1;V;a4h9L-bE%v(zR255)iYj<$3i_R!g(H}uMY2l zNz1o80g%AWRcF|F5fzz!eKTd_luDojF+zXt4DtbK|KE3dwSM+J(9iHx@Pe&NbAWwp OkdmCLY^k&%;(q{&VmCwp literal 0 HcmV?d00001 diff --git a/doc/img/comp_graph_folded.png b/doc/img/comp_graph_folded.png new file mode 100644 index 0000000000000000000000000000000000000000..723ca7ad6c585aaa7c582eb5a557c8b1cdc3e58e GIT binary patch literal 17077 zcmb_^WmMH&@a_Qwkrt#uKvL=M?(XhJQluNDrMo+&LrNqhrCYkCJ0uTqH@^S7?x*|V zu655^dVsTkyJq(6XP%ikVM+>;s7M4z5C{ZST1rd>0)cA*Kkp&Jg8vUXd+fm*jFXC_ z2&8g^XczqQ%u-lh7y_w@Mur;0gWq3#kkWR7Kro%3{=qC;khnu2A~Moq!fNgYhf6+c zsxxhj&{Z4QdU){wb+YB(swYjzXw~b!QT}&)R|N9{0cuNKJ0((zUI>nE86x$g7$ZuJ zE$)nt5&3zo`N1{EUujII4!nBSN$l3S^C?$-gOwb^ePGIu>`Z)Mx^&AtgkNSE{?;pq zO^!*%{xx`YE~({2DHfQ>-ELFaycIz}K!6DYW(n!wDmpd%_mdHw)X>vkAV;Lka8K_| z0o^E1@2hB-tWWR$y#JpY2+i(}z02!OjLC>sRnjZuygaQ!NP+wtOe|@{=I>wX=(fZb zHtu3Ahd*wxO`rZv=VfW#gH@%BWJA~l0nDp9(d~A5jHY4P%kDe5hx8(Cnhfm) zUM38d(7%s=y@`^SO2VBXP$=L2LR}w>W)Ek?_irf}v1Z)x9*e=mT`Weh|NSMk8#)s> zyGHL{L-w=_LSifg-?oVi8wXiUn~DGTfQY8YRx_G39~+d6lI#Uq4L^0DV1S10ww&t?f+=|at&?W5d4f~r<5VQ# zFcS=g1!#k)Yg%cTxQj+%R<-SEQm2&%)Xi^56O;hzMy1=}N|I!kdP?>%R^7 zn?H6C|9di2Zk@;+W}(!Mxfa!*`M>!W#1+=4Kk|sn}guCu`wh#G{gQGtT8Zn8(#-!+4(~eByepS)V`ygg9E-g=wz#722i5ntO z_jmb+dmfGMp1bgN_N5K;kLOSQ2Cs(A*^iZ0@o*%hzJI|@$70IFdM!lse`KOw(I@;2 zv{V(@>9%#4(saCnV!vhS$q1i;bBORi}^NPcewlF{+kgy5m$v^=hbQ|y3LW4=P0Yjs&Y>Gh;ak9{j2OO@r=@LNp#1oWo?USTjW^HTTE zR;v7J;_m{dXP?-RIR)R__}J$C`@T<4c0-lQ+R7?)r4@ET=E}x@$Kw}S2i^V zB%q}n{3p!w6AFB+P9uwx6fO=jZ38_UuAiuTFEdq5=bjWa1*wx_Yt8GSpk?3I`*D>< zGwOq?g%K00yL6IjOrM>e9a~=VlO9rKlZG|-DD^CzUk$PinM$d?s;d+k8bVo)?Pr+_ zCQhaXZH$l*V^T8X%_36N?I}`4dgxOlpLWu;CCkoe`oF;*sJGUFY9vSBajKO6@UM6b z0S&ml@{97a7G#DKC_fN_ZXogf$K8vTCFVd0uz#60N1k%eJa&Nv`0wZWnSHjwduju- z?qKpdi$e6f?plAIE#44{DZbON;C7zhqO3-888h*d`IUPii1oC3`{Ns@ATSXB`^zI9 zA87)KCo|QNfME-c=y9|Dx#F#g>&K6^BHB4>W>lo~|33U@CLxdylKyj>Pvb#4|Fb^- zH=5TX;Z+sC^lcEbZ;KiW4}R7WNR(f2&u*M{!-iT zA)7%bG0y041}k(MuRn)(cXy|x;I%=Rkp3AIQ_h56zqt{Zai{0rO@{bxcetAYechUszCJW@a{&EttK6-0^Sit9;lgPTfBBbS$!N zGx*coCJ*8mGQeOv^JMm$<=LHH+er(!6R3pQPyK(+-Cz`a&mGiDwuYY_YDnt#8gYp-vb-VG~o{-m`IX?zdg%m9?a=unQnbI1@-R z1=4&Rdzm-0&*-x>JH0e`ydGLCSRbB)bXD2|-oL0WoW`BeM@0jb^>AxbgyOW1Ty=U%FGKZAy8R+P` z`}M9JUy~gWXlrziR#Gb;`OIM5+bKFb%@ECx3=c0=8^b+KXzT;q>$!B>@&WBz4SodH zkY_l_&+q6TFI1@aE3lqz#_Om;Sx1LyHAzrbibBplj-*Xc#Uh4z*nXz8IAWr&)2Vp0 zQ$F@I9NjCA&VSfYAaY}AG#g#^2^CxGb`12s4*A*FK=(ddP%`w=;k^CW1;u%}&Jz7; z;9#fgo}HJ2T|T3DLqvb*aAe>2AB70Iw6{RBLPV4Juk;;;!fcRBcHc5GZo36xd&vfg zJUn>VSIAOq)x4=NU5IQM)MJyTG@4VUpYn5|x-0sf~{@uZ8( zm9585I(I!P+I2dQ+Sk^ed;9x0`fK^4N^x~@(SuA3#+c`+td`{(l6Ys{c)^5KbnYq1 zQ}Vf~QNF&ofm{!W+P;dv$SL@27$7O$KDY1sBJi-d4V4!<9YkVkwCFsSa2fe*ime8Z zF?BRL6&=@e)6xjqh;VM_>wCES_t7{`&GE9C!frZ)knvB^pS=*3``wwnOH`Aa`-xGM zCjSc^hNKiYQd}YsAxN*zu(a=FYngS0aP`j`Qi}L~v--KO?^g#u#WGh#%ZCgki>B8! zkWzD-502j|#L~PyzQ)$qisF131qn+PFle z@1$f77h|*&$ZMIiFHxc*r&pBo?;a|mhVGBTjy9j`AG3yqg?Yn>;HL{{pOF8lLeEoG zQMrbN_$wYDRHPvf?T2F@73n6uRaD zCOw~JnvD-pkKkJ2>$?c7864=Y!}(j3!=;ww73AFLy=0%e%kh^VuNTC9Uz3seWp^l) zd zQN*%}iVjUzi_3#7hmqIEo)l20*VnfXtO<#aUW0KCUU}Jm4>v3KM{Zgp^h3Z@Yn0*A zX$eXGdTdG)lMLO~q@Xc2WDykAPK}Cb7l^T)t^Tx(B#8*Qwsem}X}p3qcN%ROB>_X8 zlHxwXb=6|9ru*DFAgyJqT)!o0DHZOeRJD3-rjD-e{{B9rUX##59DYFAmLP^AiFa(e zwry^Kpl8uW_I}Oij?W)FTjvc+$10y*Hy`GPPwz2L%fBV=!ok5MGwA0P*VfkV$f3;$ zbRrPr5kwZ1mF;V`-%gin*D)Bj$?xlG%;D6zL0diV|@R9Gim@@7o?BdIUG@ zBKD8mpiYNzy#vUJtwG5ZTx$j@js8xok9~);=Z(aoFw@rT?X*PfrTM=TCN#_A_&L_F zSVbl6oSd9QMFXf=w~C63EzQih`U=v9w^%Jls)r&Mi4b;-xV$dQ6^i4fD0p2Da@yG! zq(eeXl~vI#mWs#&(a=}~+<{SNC|e=ThNc&E}FbR})Xl1jr_hr9%)*`8)|bH}wGIG+0X zY{eI6X2t8Ma+&=I_si3O0WjkWcxqvEy)A%`vE8Z9}oPERNO zHiw_9Vj1E_x19QPu4u@j8$DbH;$k5et35p|22x97>CMCpM~f-9P=7@3HsvL5b5EsY z(`o#5rM$@of@Ak+dV0-XqEw3=LfBmEnAA`NE2pHSokGM|P(k{$Z?X$%F!idr#?&+oz8!-M-M$r$W5maV{+be;Fj zS*zF8(Og{!n&X!dadGhrs~WBA;(&NAOTW8e+V7KnZn#v0x@ls0Hu8sge-6bZ6}(+* zhA|lUhb}^B=;>Si9v>R)7Iqv0n^0B*#4`p425xUXeQ_brAn9LUIO6lY@&xm4UZjgk zEbLEwm&o^X`E8s`Vcy8>(C7Oe_%B0$v|u!FvQ4Ps!)jUXU#O_4Oj)~7EUMBapjEti z_f97FaCH#lG%r6tJUskm8Vu~Pb-hY@4=#iXvlR4OvuNv}M!UlGJ`U$SVHr}P{PUUq z=}6OvLL~MJUbjP?UJ8E$ChPF<(bEM;7e*Qk6%65WKdunP03H6ikyP)j2rn}$SzJ1= zwfYwTJHAz0`$fqasKioZvM0V%yA^o2!7$@UAFn^)%{M4qJfpU!Q!=7n_7h8Ue=kuI&)b z<Ci!gU7K>HUpSQ;>)_6oayxZ?{fN>r%ftsqiZNut%AfZF zZZyc!#Ghxr+3T2$^Dba78JN+%y}j$}>$9^n`E-s`MgP773AmZYj0CrW z${r=Mz8;|yhzP6ysvMQ1e#HLhYV!w$A(W4Gq+|7TF|&WgsTr18f8%T$VIa7p<>Cuz zrRbpjz9sfjtxF39w{5#byaqUj-*`mAY(NCR%D}HSw1`nC3Ym==l8n+>w@9s9M+x;Ur|- zsz8#$;zY*1(r6t{<_k~Mutj)uwB}c}m$NnVi;Hr_sLp5f$%Ekf4=pG9@SpXn{`{i& z+qZ<_t5<7D^tux7Qy80qdgLMAPSER9WLyUC+udSs{0=y&Aw|dG;kR$!CJ>NQa8CL` z59aDFNTO%!g1&z0J}}w;UGWOX(EC)pjp*wR>o1M|%V%J{=s?&d)2e0D|Mg%R`fc$* zPmM@_Fdj8*4xVyradC0R&@YoQz!L1-kG~_l#}t&5PpE(q&Yg6kk2yN*%|?-YdK58l zgK-x^{?&N=#)ShO`hyT-6fn2e%F4>_Zm~AVDl>%zNzJ17#m;!W%}nj0Tz^VtGtaIw zPucYR*cem);qR}i#ot?6LV|@fRT+$OB*miz_8u zp%e35{OQNCG$}%TXJKZhq}Qv>{*0Bv!kCN=E3yjPJS-UE;o_2tCh>#T^qZOR!narl!FNBUP4VwmK!&m_8S&%*G%S3@14REU3uQdujnmLnxlXO~;O;D5Xm z!*4Aq34I5ghpB93a^Ze%ojPTd`Y=={6KIHce=M`YfQmGGz9|4Ke+`) zlKVMUogw4GhhWY57%7U)%}vv+t%;E4va;~yzIxiU;r5p#B!XLwL_E$rXPbkjMSnKC zKFAQ~EfS$S?_n$CpT?xX4dbV|9W912ZFApVWCCM0F(a?P#IF{Ch0f1dBS3N=+K|j| zxoxoi;hvhAf4P3-yAAf$rDE~nj+L}TZ)qeUXWlL9>nWkhY}4cAR&V@gBJ{8J+o`8) zxuZRJT+9fKAz|7^&SM$ePQg)x`QuW}sVn=P_?gLc+O-ZV0^3k*ApHbS{$}==G@iJ` z#GWranU&war`cf@{5cVl6nTQPeQ?u~V25^hcXI`Bkuc9GxLo$sR8`~k82UD;6qPi7 z@BjVID9|{6fSiA4sBNVE(lV9T%iqt_8-VhazY=~6Soc0v8>-UN>>r(Cay$|G+LcOz ztZi)azllaeD{Fwk8vm)-OgXjimSU?TkCk4tyl%|H?=`W+LZbsG9*Xm<@-Lk5p08iO zzVg6jG@y%>vI-a*{LY=w?y%A(PpDaCP`_|EK0fXYEi5bq8XaLEca!o(sy{ah4bwVz zzwLHn`O#{A%)k3~BZB(3Hk{8S0*NA_C*Qs!NvnmC=S7FJWT9y`yWfX-IoS&p&sU>a ze+TyDtW$RJR+ryikTO39jDgKel3oF>C4W#%fI1XyRm+qg#7oc z7lOS8M`>8}FUwq^QP%GtUZfyV*&i;Tf-89bTi6QxvgzT}jJ*63e^ePmT{e`jYgt}? z`ruP`9s==NFtx$Xew}@l-vY`#CQbVc%AJ2j5YP;PlYlm(!s(Gi@ zSqu|g9t1KN{J2I9dGLNbL4QXgHP=U|qB->!`f}XbJc7JqzRPOYOj&&DtrNNG{9hZtY z6P}psgVAUff9^sZQ~j)ZGNZubqDAAV_u|z-*Z!26JfY{1;9-*dk6$cax%50*x=U=8{WT+M&GsO?CXGuJga+G{&;Gyp||hL(zFv-@XOvSxK0nVRV~UeQRiV&Hd5L*x7IVix+G0 z3s=;nC7~C7IWnYI@aQxCYhC^rmlBNGy`*s6FV?cEDLW z-hFSuK|$~af4#P`^mZfXbW_a0?{Oh1~-A16ROp3k*KOFk_dxW6*IYnQE z9hPhF7tV4u=(^fgMQLAFuovHOF%$l^4x(c)UrM^79i8SxbvsH;+f#h)L(562P*h@t zB4*p5MaPjI3Cm!JgV>roZ7tp|PLqeDV{#wMh-l{e=;K+C7KKd)OU7`wI#DQl(e!4d zk9H)>eNFqWGo|mya^o{3cYq^ft2&!VEnbe0KqnXs)(NOa9FMu37L3cJ~_) zzxrD`RM=d&z4+(FWb@Tb0rnKZjfvHV5fZdq^ZA?(c4g914n(rh!M0p6@i2}L#6v@i z+R<`FW7fKzF#EzMH@&&Prsn5mEZv-5>mlow_557|Hj{z%5=7 z>PYWt=La|Hh{SWM7yG8Y^Y8|_q`Y=kq+_`BmI8bGdwc81!gTAHPT@s=tcp!Vzu$b* z$j5W|OycRV=2pPN3xhf{Dap$vlm9fXO6vlh~VF%tX!b@3&~ zDn!gQlHrKMFNU&*%sBO5ghB^>zeGp3+hk(zj4e(Iic;wg;2PCd3s~rRoc}KGYlUib z$ZcpKp*#@QyrF02+?y`<~A zK4z@!W8J!XVu5`S&EN^4={r5SyQgEj;!D4!utsWO_H}sh%cRVOJGjEj!+H<) zBRMluKKk!fmU)j*lbLT(qo0I^299BH|HSX#RIyUvL{jna%r|$qtfDA54fPR0@u@m7 zon-Tq_Kk4t7-OhE5FSyKp@{ZMh$iYVT?R1;062_X4ruS`*cibTvz=moh53+Oxy|yUwBYq{L}AIj>F4!g%SQo9O7RMT z&<(N)QU+ubfN}wE>vt4!xO;WqJ&n51@$I~Eg94K|e2XcyT!R)T>J-OD2BGLV$Dlx* z)UHz>?q@Btiklf@rN?&m87uNCG5u^8rv{O)adh;~iAcP9!3A8$q@BO^J| z>EFF8EiMi*SoM@;Z0)sToAC8SHQ#)Mjy@KQ$<6(;Z*+5Z9!t-8gZQ2*UOImqdg$O6 zFH=$G68^vES!Tp~-2UH}f zo&H658W?F(hPSr1EHV{1O-^`Q0>zlquv3Ar4V=1&2o#J8Tdk zB)-5cxDD-mb6)|7wQhv`$q|6+MVB-Mm)p7wdc_84i zd^EuvaEicrT}425AQtq6f)K@M`lHj9>Do*QUPPLQ-%p1;lP$lC@kf-0wya+=Ly>zW z?1fR0MzTW+fI*tT^l5m3J?Bm9tRokl5Q0doLe9`Mx&O7l04|Ni+1S`P^pa7c==X1p zND0wm=?m&2tgP`^x1o(sXb>5-4Px}%a7@;A>^b03Yv}0>;Zl8WA|id2rkGidiF)za zD88hP?6-AwyH_O`{;28Ing5ffd-JPE;^8N_=dMSK(x{Z}$b6y|qen|EBnz*p79XSW z6`!?nkMdp3Xq>+J!a&BX@FI*`4#46q;D*1-Dv`h={y{pcgjQKzZfkF!Bt?geeBn#!Vk(?9QgdV}O~6x^C&>HIk{f6GJ9DQQ9Y^U?IckmQ6jW1O0w zmKHhts=s)0x3sM6>R>M5CdEf6JLvw-?{U^Xb>;psjN}EYWplop$5GXtC1cFs=FQCw zba@p=J8VuXvF@V6bAHyxQHt0gEVY_83;9hO5x^gLTbvYUnYPsEZJO2fn z7En4n6mu4JJ$-$amX?!!B-!`UT|9{b_ZO;dC`3YO z0t~r(O3KS*YOIzjuzf(CXR~m(X4fsyhp^DA^W1N9?!!#i-!61q@Ft~>yf(j0QRUA_ zOx%>~z0i1-wi}qbR4bzRatbcqUqD+T|8(W?e&vS6JwS&6kX+fY8W{p?CkLY|0yfP* zzZm3}FLGLXJRZQC)LaAWhFB3GSc`eim^ri{xL=_n#|Jvl(J+dkO&BmSA7R{wMPRR+ z^WGUFE1l!~c-@NetR$7fwgP|A2iEkAf|;B_5xZ3X?Xk*99%M~!lw3(BOVH0(MP&jU z`9-~b8{nEF`mS}t&qqZ^7bB7J^Aqcjd{WiX8DQu*Ge1G$)ii4yQ_Ae}T#xdDI=U&2 zI*()3{Oz;4BX(@}=w#ULV>Km_7%X2GhnvjrS)Z$FyCtX(Aam=G)H1=TS1YLSetc(F zF6gLg$Rx~Ix1{t&*R5KWLWciiVIf!K3Lz9E7yv3{7h|;Z8Nts_I+9mz*Dy?2Pdb{j z-b9_;LYqAAOVZn^tUvtysN`b~Uvg}$Ff**_9;`9Ir`J>NQBiWO28tj~BlmkyRC}&| zGp~5(;1qbNIAF_c0bYTafe<7vw z8}ne7#Xfj@LXtUcWX+A5V|5Q@dH;|}&Vx!-IG z8szDq+)Zf_3led3-4b+f99-5cbs|lMXlxy--u?2}SBy%QMs_(QxOv6swoX|3nU6?k^jOr1OG>J$#13vYQEawr z$kA_q8i!-@CNz@^VXpTy#^(lrM=&u}Jg?xd_?@iU8Jo5U9jC(FvZd{~%`|1Vg;L*7%uqK2hoBJ}IcUa0 z%;Ov?fRf81?OtN)c0G>30QG`vp6Md_v6LWa|PS^%f*CG_FtHT`86z9W8ygq&1zdb;d5K6^9uj;VMwAcEnTps&`# zxYy+lP3EERHvh$#x^}$_nwz!r$5+t=PqJ-HB=myEX)eZ6n-z-l#{ypL@_lpJ{|(@a zev9kr=C1%4yw5@=QC!Zx-0I=)LkvxNiG4}wEWuLv@0>Wuh^POQ52o_NSMCsP zWIx_ylV4V1qI6(jU^-FcO%A`+mg615bdr}x&8*k0&;Y6`VjSjiwAdUvxcWj)ij$0^ zUaw&3Itm8wlYci+Zb?=qwC7tF6|*X~p1!_3^=rW90^{98fh3aKeQGSstcy*yRm``oRhz(gAA| z;J`$-WOZUz?pC}EkI>mLC!lE?J8U*WzeW(892*CPdm^h}! zM16Uc_VsmmMhj!-$N!#f&ljY*b)Cz;(;E3Ey(x@Cv2(hFxEV)NSS zj2H&ze-Ducf`?3tw>lY$i2sD8rlGoqWa}{iU!0u59Y!cH$9s#1Odsr*dVuj*GX9sR z2 zt!T7g${Ra)TyhXjNtsKm0_h<@zK{;39;sb<+*_=P*7FhG3)ANwCcTgdC9oQ-9Q1Pv z#v=arAeah>aBz4bc&a6eQjOqX7-i{<1j68v!H41Vv80B-SA^w3E%^s%im-0~XEOe$ zfjfcDw$|Lj3^7RH42rzOWOe_d5L8xH_Li+otyJj;-?rG}%cn`K(kG>hnu~k~7!@Q} zJ)fTXi0A)uA|~Z0vKq{bwv@~~U>=OFjHK$) zBR$`=BZ?y5rt-7}ixU3ibL58zQG7Ub{c9%j9k>kF!Ic#o11;}fSf#4M2LOH zCujkiW_yb!GCcO}{iOH4QITupIDUmAoUO2t8#6YL8R9f7w`tT$ zVa+XI+xz&Be9T-Lz7R3D_mR7bhS@It9GN25)cG=b?GxW`Pm@%^sVA0n)M& zyE^HM;USNNXd!sfp7RJ?Mbx9yNey2o7D-=r1W)bDDJlIrT1v~(yG zs??OC?HhZ~lVRJ+xj!%?%eybDo`&-vZVHf`s>*tv*~fZe3K_d`ns_V#ZO5pPq3naaKVqi6;OY|$4KK=|u6_V5&cS_9~U zbFbpUt6}ZNXJwr`B$5BZs6XA6K^;c#`n;iR4{%18nw>)G2`;3JlVAJb`N?@Sf;cM2 ztggqxPv8SXPy0v~4qe-ZQU!c?_EC^(@R_a!EM$f#Q=yfHcx8WNS9lod+b5J+j$&ST z8G6Z@h6~rVUo6%qpM61k#m$|{{l35E^#!hzgM*-Bos3#F@T^umJqf3kZDjgxPWz?- zr(g9>KxgG`q6{zZcpu8oZD5Yc*IA4g)ckHh!q?tduIK?Qe(QFBJoVSo3J{#Mj(r#h z9`RVSz&Z;=2oZ1|xfjy)oIxPbWkyA>D6@Pzr|;)a;nEX;J|zt`oug6xEh2*&iHD}t zhy}cx+S{`kopvf{7^<{t)_2D9TU-y%E>L+<2om3@N(hRpiF{P}^u6JJ*Iq-W?jk{C zfou-|w^f53cm1H;06*cJ{y3+d8r6%b=AVe`vww!nlw~*Se+;MJjj(@-+f04kV;);l z*s%VCMm?4)e)Cm1ZrF(WZX}cyW?ES{CZl%Ks+9|51d0aXaxY*>4Us}h%^5(3)ijb_ zousB3x>BMfelnw1^pb`Y;f2Pl#wi$Z5K?Dl1as!8`kS2jtk>_O z6^emo*%pV;(9}7A__%E}bqQI%zru=?app=-F8`mO0nfA0J9bzT%oy|<^Uw6+`;`xV zX!zA$L?(RuJU{-mW;7?M%HxsZN~CXp*)TaN-`+XdB&4f>Fp5YZw=K%mQx3ZYwc}Ev z{^MXa=74PJJGS)5!Y{BDmOmSNlw8ykbYkKq`uQZKV{g30D=Y)Jp=QTe+r;BbwqMl? zJ=kXF37?jjD%aq8s1qh8lC9?YcDQ~$f4&dotW@+RxBemM!=Eo2*sq^4SZHx`pot&@ z+2w9paKiM6v@CkpP8V@(Y-~+0hH=6R0f$*sv~{x91ppl&Q_ML>$2f7Yu@jS$CPu7} zet(D$L#{S8c1 }D_;h>)9k(VDd!KdD@%^4bGncF?~GlpD-~{J0dFdsG9N2yrKqVKw|OH^BNJc zB8HXHm%4$3x1r=_oOcR;^2RA|r=o(ZpSY-m>KP$m!R6OuQmkcNwv z=Q`RoBhp534=-BLW51_eX;1rpXOchfLV(0!tEE=5KmCi?!`}UM_>W0Wo5ZMeZ2WwU zngmx~!>!~sKT}(2k3O6FQnt12>T1F-l>1SYPfl;ggQ-tFyrVz#sz77od{l6BbktHjnC0Sd zp%L((zb2f3H>Ddqc2gBSGuCvLv(opEp}9T4;zae1Pxl%& zMMKbnTKSnJ#h!WpK9x<)hbY8ndVaG27ux z-YcNHgF{5T8WW8m-|al8SlRvoEUNwC)@EKoPOd=nJI5Q~pg@zP#P0O7&&p3t?(n+k+dpI&qb|m|OY#QcL)N>a1@J@M_8;Nr zN>CPWo_`Y824}9HE1`cp&|kT?oX8aRe=#2nMx~U1twp z({HS;4MM3$)M$2P&BIMcP;x)i4MaM8JbFaN0Im}v$U#wG@H%eoV_|OgIHx)UTm+y` z^4~ca8RgW~#c#eZeOUI`CI*qtv9XS}kR+?X%TM^ z3lW+vtE!sW|6BFr45?@FG+6g~_3w;fac?2WPwn^w2W9Z1q~Oxf8|RElldY)gS=CA* z)!v$4o4dZydNnb#EX5daK^48in3hvc{86? zgp6$IZ8#omfo>;6tE#FlseRBW`@1(?CND2PQ)O8EX4AAri_dZ-1Ed;%nYo}@>|p^F zar~hC1Y}PTyfWUXRW&swXV(S6Ym7g6g>FvfQsH|Mm5mQI%`7xd1k;L37&}x4n3jJbKWH+Y-urbiy@Tsa>@mre2^pE<<>h@#1HwNckGEBi-$Cg53Y&;u zwkp>%#^7-)-2n{^ZMn@Sy#hWlHuh5!3IxNJrawY4DfOv@Qhnjg?7E9>!`72VeF`0< zU}bdz=`o(IS&Vno^38sAYR-RiL~=QtKagV__|U7vb4UGbEgMwlRz$}?;wyD-tE>_) zQ{h3NsnRTiMh_l+z+$0IkmMDg5J}W4LE9DZnf|G;%{G+Z|El!x!n_uT^(7I2mI3Q5 z=H{g{G_=WyiQ}o?o$^S~$M@gzBm-N|3*xbpEid@10f2!Wo>DXPJKNE9DA!AgQ8mF> zNAeaGg`?EzOosCpshhU0sB())KhgZ0SK8oRUvlof`#mVir8SSjk#>dWJtn2yqns{_ z$?e9ErA==7uBvd|=EBzv4sj1E+tGZcrDLsH zWSEc8NoW~o>HmVN5{ocXwh%?)A;3+bbe62yAA6k-bH1g$n?w~_kv$eV7Cr8jV12Bo5PF#dAhV=((`A~`Da^|xK;gcYm7QBZ0Q;8m%+~v>)xpc6%QiIA z{06bV58S>yo70<`DCBn<{7+dHs~w*@J4JA;;kH5kz~2w*g2IRb130j#KHy$=rQGLi zzd@x%TA$W5ERe^t+VE$CK z8UQh4>GMGXR;-*RG#r<4f;{Oh3H7K3{d^=&mCI)5<2TXULZ?Nb|FR zAYA~!hwLp!QgmQ1nqQH{GC)J#-cXPk7l=Ac$FS1o(^6fHk<9j!AKbbK)9iPbhv89C zMe~5%oo~EL`+8OifU#SSohRp%Aq0|3&!G|1A7pA`0=yaV=)}ZCplk5$GlEZMCpfsr zTyHe+g|WaEVn-F7AO}#R{{DWXLKU*n*qhd~Xkayd4gMz3|KWZLj1?;@E4AKO;O8^W z74ON}St<79VGF>30kEk>LI@${YR-)I2wa;%Vm@bLXC{c;0^GIJ_K45@-*wRSp0R zK;Hq1YTYf8TtGnJ?OTu;F5J#?4FSD2$q#ki5=@`57s^M;zgNzzc_Ufk3ywA-<7;XQ z(?suG-R<|}e}5$=t+O?-D$(%k(&1eGI$lpg4h8yC9+80JD`FA_c};0yGW9O0cAT&? zk`oT%|6#+}q=7TZtde^NFOC`i6FzA*QwtGSf5KgK0Vt;a_Eho?ibQcG>^^-CAAB=@ zc@G~th_MH_6LB2{7?M=N1s{XV7oa>nskLf!wnUxQOtujkwnSX_(Cr(v;*<<4$4+7K zAu;K#B_X|pGYc4zx6md}BDTASUcavJA|tHT%q3Srl@vQy69yxO$4&srPFhhJoh017}rg=#?AWs*)q zEM1p7*i8rod}RT&)4rs_XwnjAi^A!@Z#W3U&xokUety$Y{;Qk*UvVx9CM%Oqr1d6_ z>x|*v%S572Pu0QxA>Bl~2SDzg^1=#dpq|e@-8JyySD6g7Wr^f;sVQ_Pv+MPtCJ}L0?a_4fc`1>0F{0aWE2mQ34PK1n)&Y o01)7{C`|JI`1J}KSYZ!-U#l}G;A}BnpSqA1R}iZdG5Y+!0OG^%!~g&Q literal 0 HcmV?d00001 diff --git a/doc/index.md b/doc/index.md index 12f1be6b8591..866bab6c3e83 100644 --- a/doc/index.md +++ b/doc/index.md @@ -15,6 +15,7 @@ User Guide Developer Guide --------------- +* [Programming Models for Deep Learning](program_model.md) * [Developer Documents](developer-guide/index.md) * [Environment Variables for MXNet](env_var.md) * [Contributor Guideline](contribute.md) diff --git a/doc/program_model.md b/doc/program_model.md new file mode 100644 index 000000000000..5fa1bc30bf75 --- /dev/null +++ b/doc/program_model.md @@ -0,0 +1,412 @@ +Programming Models for Deep Learning +==================================== +There are a lot of deep learning libraries, each comes with its own flavor. +How can each flavor introduced by each library provide advantage or drawbacks in terms of system optimization and user experience? +This article aims to compare these flavors in terms of programming models, discuss the fundenmental advantage and drawbacks +introduced by these model, and how we can learn from them. + +We will focus on the programming model itself instead of the implementations. So this article is not about benchmarking +deep learning libaries. Instead, we will divide the libraries into several categories in terms of what user interface they offer, +and discuss how these style of interface will affect performance and flexibility of deep learning programs. +The dicussion in this article may not be specific to deep learning, but we will keep deep learning applications as our use-cases and goal of optimization. + +Symbolic vs Imperative Programs +------------------------------- +This is the first section to get started, the first thing we are going to compare is symbolic style programs vs imperative style programs. +If you are a python or c++ programmer, it is likely you are already familar with imperative programs. +Imperative style programs conduct the computation as we run them. Most code you will write in python is imperative, +for example, the following numpy snippet. +```python +import numpy as np +a = np.ones(10) +b = np.ones(10) * 2 +c = b * a +d = c + 1 +``` +When the programs execute to ```c = b * a```, it runs the actual computation. Symbolic programs are bit different. +The following snippet is an equivalent symbolic style program you can write to achive the same goal of calculating ```d```. +```python +A = Variable('A') +B = Variable('B') +C = B * A +D = C + Constant(1) +# compiles the function +f = compile(D) +d = f(A=np.ones(10), B=np.ones(10)*2) +``` +The difference in symbolic programs is when ```C = B * A``` is executed, there is no actual computation happening. +Instead, these operations generates a computation graph (symbolic graph) that represents the computation it described. +The following picture gives a computation graph to compute ```D```. + +![Comp Graph](img/comp_graph.png) + +Most symbolic style programs will contain, either explicitly or implicitly, a ```compile``` step. +This converts the computation graph into a function that can be called. +Then the real computation happens at the last step of the code. The major characteristic of symbolic programs +is the clear seperation between the computation graph defintion step, and the compile, running step. + +Examples of imperative style deep learning libraries includes Torch, Chainer, Minerva. +While the example of symbolic style deep learning libraries include Theano, CGT. +The libraries that uses configuration files like cxxnet, caffe can also be viewed as symbolic style libaries. +Where the configuration file content defines the computation graph. + +Now you know the two different programming models, let us start to compare them! + +### Imperative Programs are More Flexible + +This is a general statement that may not apply strictly, but indeed imperative programs are usually more flexible than symbolic programs. +If you are writing an imperative style programs in python, you are writing in python. However, if you are writing an symbolic program, +it is different. Consider the following imperative program, think how you can translate this into a symbolic program. +```python +a = 2 +b = a + 1 +d = np.zeros(10) +for i in range(d): + d += np.zeros(10) +``` +You will find it is actually not easy, because there is a python for-loop that may not readily supported by the symbolic API. +If you are writing a symbolic programs in python, you are NOT writing in python. +Instead, you actually write a domain specific language defined by the symbolic API. +The symbolic APIs are more powerful version of DSL that generates the computation graphs or configuration of neuralnets. +In that sense, the config-file input libraries are all symbolic. + +Because imperative programs are actually more ```native``` than the symbolic ones, it is easier to use native language features +and inject them into computation flow. Such as printing out the values in the middle of comptuation, and use conditioning and loop in host language. + +### Symbolic Programs are More Efficient + +As we can see from the discussion in previous section, imperative programs are usually more flexible and native to the host language. +Why larger portion of deep learning libraries chosed to be symbolic instead? The main reason is efficiency, both in terms of memory and runtime. +Let us consider the same toy example used in the beginning of this section. + +```python +import numpy as np +a = np.ones(10) +b = np.ones(10) * 2 +c = b * a +d = c + 1 +... +``` + +![Comp Graph](img/comp_graph.png) + +Assume each cell in the array cost 8 bytes. How many memory do we need to cost if we are going to execute the above program in python console? +Let us do some math, we need memory for 4 arrays of size 10, that means we will need ```4 * 10 * 8 = 320``` bytes. On the other hand, +to execute the computation graph, we can re-use memory of C and D, to do the last computation in-place, this will give us ```3 * 10 * 8 = 240``` +bytes instead. + +Symbolic programs are more ***restricted***. When the user call ```compile``` on D, the user tells the system that only the value of +```D``` is needed. The intermediate values of computation, in our case ```C``` is invisible to the user. +This allows the symbolic programs to safely re-use the memory to do in-place computaion. + +Imperative programs, on the other hand, need to ***be prepared for all possible futures***. If the above programs is executed in a python console, +there is a possibility that any of these variables could be used in the future, this prevents the system to share the memory space of these variables. + +Of course this argument is a bit idealized, since garbage collection can happen in imperative programs when things runs out of scope, and memory could be re-used. +However, the constraint to be "prepared for all possible futures" indeed happens, and limits the optimizations we can do. This holds for non-trival cases such +as gradient calculation, which we will be discussing in next section. + +Another optimization that symbolic programs can do is operation folding. In the above programs, the multiplication and addition can be folded into one operation. +Which is represented in the following graph. This means one GPU kernel will be executed(instead of two) if the computation runs on GPU. +This is actually what we will do to hand crafted operations in optimized libraries such as cxxnet, caffe. Doing so will improve the computation efficiency. + +![Comp Graph Folded](img/comp_graph_folded.png) + +We cannot do that in imperative programs. Because the intermediate value can be reference +some point in the future. The reason that such optimization is possible in symbolic programs, is that we get the entire computation graph, and a clear +boundary on which value is needed and which is not. While imperative programs only operates on local operations and do not have such a clear boundary. + +### Case Study on Backprop and AutoDiff + +In this section, we will compare the two programing models on the problem of auto differentiation, or backpropagation. Gradient calculation is actually +the problem that all the deep learning library need to solve. It is possible to do gradient calculation in both imperative and symbolic style. + +Let us start with the imperative programs. The following snippet is a minimum python code that does automatic differentiation on the toy example we discussed. +```python +class array(object) : + """Simple Array object that support autodiff.""" + def __init__(self, value, name=None): + self.value = value + if name: + self.grad = lambda g : {name : g} + + def __add__(self, other): + assert isinstance(other, int) + ret = array(self.value + other) + ret.grad = lambda g : self.grad(g) + return ret + + def __mul__(self, other): + assert isinstance(other, array) + ret = array(self.value * other.value) + def grad(g): + x = self.grad(g * other.value) + x.update(other.grad(g * self.value)) + return x + ret.grad = grad + return ret + +# some examples +a = array(1, 'a') +b = array(2, 'b') +c = b * a +d = c + 1 +print d.value +print d.grad(1) +# Results +# 3 +# {'a': 2, 'b': 1} +``` + +In the above program, each array object contains a grad function(it is actually a closure). +When we run ```d.grad```, it recursively invoke grad function of its inputs, backprops the gradient value back, +returns the gradient value of each inputs. This may looks a bit complicated. Let us consider the gradient calculation for +symbolic programs. The program below is an example of doing symbolic gradient calculation of the same task. + +```python +A = Variable('A') +B = Variable('B') +C = B * A +D = C + Constant(1) +# get gradient node. +gA, gB = D.grad(wrt=[A, B]) +# compiles the gradient function. +f = compile([gA, gB]) +grad_a, grad_b = f(A=np.ones(10), B=np.ones(10)*2) +``` + +The grad function of D generate a backward computation graph, and return a gradient node ```gA, gB```. +They corresponds to the red nodes in the following figure. + +![Comp Graph Folded](img/comp_grad_graph.png) + +What the imperative program did was actually the same as the symbolic way. It implicitly saves a backward +computation graph in the grad closure. When we invoked the ```d.grad```, we start from ```g[D]```, +backtrace the graph to compute the gradient and collect the results back. + +So we can find that in fact the gradient calculation in both symbolic and imperative programming follows the same +pattern. What is the difference between the two then? Again recall the "have to prepared for all possibe futures" +requirement of imperative programs. If we are making an array library that support automatic differentiation, +we have to keep the grad closure along with the computaiton. This means all the history variables cannot be +garbage collected because they are referenced by variable ```d ``` via function closure. +Now, what if when we only want to compute the value of d, but do not want the gradient value? + +In symbolic programming, user declares the need by ```f=compiled([D])``` instead. It also declares the boundary +of computation, telling the system I only want to compute the forward pass. As a result, the system can +free the memory of previous results, and share the memory between inputs and outputs. + +Imagine now we are not running this toy example, but doing instead a deep neural net with ```n``` layers. +If we are only running forward pass, but not backward(gradient) pass, we will only need to allocate 2 copies of +temperal space to store values of intermediate layers, instead of ```n``` copies of them. +However because the imperative programs need to be prepared for the possible futures of getting gradient, +the intermediate values have to be stored, which requires ```n``` copies of temporal space. + +As we can see the level of optimization comes with the restrictions of what user can do. The idea of symbolic +programs is ask user to clearly specify the boundary of computation by compile or its equivalence. +While the imperative programs prepares for all possible futures. The symbolic programs get a natural advantage +by seeing more on what user wants and what user do not want. + +Of course we can also enhance the imperative programs to impose restrictions. For example, one solution to above +problem is to introduce a context variable. We can introduce a no gradient context variable, +to switch the gradient calculation off. This brings a bit more restriction into the imperative programs, +in trading for efficiency. + +```python +with context.NoGradient(): + a = array(1, 'a') + b = array(2, 'b') + c = b * a + d = c + 1 +``` + +However, the above example still have many possible futures, which means we cannot do the inplace calculation +to re-use the memory in forward pass(a trick commonly used to reduce GPU memory usage). +The techniques introduced in this section generates explicit backward pass. +On some of the toolkits such as caffe, cxxnet. Backprop is done implicitly on the same graph. +The discussions of this section also applies to these cases as well. + +Most configuration file based libraries such as cxxnet, caffe are designed for one or two generic requirement. +Get the activation of each layer, or get gradient of all the weights. Same problem stays for these libraries, +the more generic operations the library have to support, the less optimization(memory sharing) we can do, based on the same data structure. + +So as you can see the trade-off between restriction and flexibility stays for most cases. + +### Model Checkpoint + +Being able to save a model and load it back later is important for most users. There are different ways to ```save``` your work. +Normally, to save a neural net, we need to save two things, a net configuration about structure of the neural net, and weights of neural net. + +Being able to checkpoint the configuration is a plus for symbolic programs. Because the symbolic construction phase do not contain computation, +we can directly serialize the computation graph, and load it back later, this solves the save configuration problem without introducing an additional layer. + +```python +A = Variable('A') +B = Variable('B') +C = B * A +D = C + Constant(1) +D.save('mygraph') +... +D2 = load('mygraph') +f = compile([D2]) +# more operations +... +``` + +Because imperative programs executes as it describes the computation. We will have to save the code itself as the ```configuration```, or build another +configuration layer on top of the imperative language. + +### Parameter Update + +Most symbolic programs are data flow(computation) graphs. Dataflow graph can be used to descrie computation conveniently. +However, it is not obvious how to use data flow graph to describe parameter updates, because parameter updates introduces mutation, +which is not concept of data flow. What most symbolic programs do is to introduce a special update statement, to update some persistent +states of the programs. + +It is usually easier to write the parameter updates in imperative styles, especially when we need multiple updates that relates to each other. +For symbolic programs, the update statement is also executed as we call them. So in that sense, most existing symbolic deep learning libraries +also falls back to the imperative way to perform the updates, while using the symbolic way to do the gradient calculation. + +### There is no Strict Boundary + +We have made the comparison between two programming styles. Some of the arguments made may not be strictly true, and there is no clear boundaries between +the programing styles. For example, we can make a (JIT)compiler of python to compile imperative python programs, which gives us some of the advantage of global +information hold in the symbolic programs. However, most of the principles holds true in general, and these constraints apply when we are making a deep learning +libraries. + + +Big vs Small Operations +----------------------- +Now we have pass through the battlefield between symbolic and imperative programs. Let us start to talk about the operations supported by deep learning libraries. +Usually there are two types of operations supported by different deep learning libraries. +- The big layer operations such as FullyConnected, BatchNormalize +- The small operations such as elementwise addition, multiplications. +The libraries like cxxnet, caffe support layer level operations. While the libraries like Theano, Minerva support fine grained operations. + +### Smaller Operations can be More Flexible +This is quite natural, in a sense that we can always use smaller operations to compose bigger operations. +For example, the sigmoid unit can be simply be composed by division and exponential. +```python +sigmoid(x) = 1.0 / (1.0 + exp(-x)) +``` +If we have the smaller operations as building blocks, we can express most of the problems we want. +For readers who are more familar with cxxnet, caffe style layers. These operations is not different from a layer, except that they are smaller. +```python +SigmoidLayer(x) = EWiseDivisionLayer(1.0, AddScalarLayer(ExpLayer(-x), 1.0)) +``` +So the above expression becomes composition of three layers, with each defines their forward and backward(gradient) function. +This offers us an advantage to build new layers quickly, because we only need to compose these things together. + +### Big Operations are More Efficient +As you can see directly composing up sigmoid layers means we need to have three layers of operation, instead of one. +```python +SigmoidLayer(x) = EWiseDivisionLayer(1.0, AddScalarLayer(ExpLayer(-x), 1.0)) +``` +This will create overhead in terms of computation and memory (which could be optimized, with cost). + +So the libraries like cxxnet, caffe take a different approach. To support more coarse grained operations +such as BatchNormalization, and the SigmoidLayer directly. In each of these layers, the calculation kernel is handcrafted +with one or only some CUDA kernel launches. This brings more efficiency to these implementations. + +### Compilation and Optimization + +Can the small operations be optimized? Of course they can. This comes to the system optimization part of the compilation engine. +There are two types of optimization that can be done on the computation graph +- The memory allocation optimization, to reuse memory of the intermediate computations. +- Operator fusion, to detect subgraph pattern such as the sigmoid and fuse them into a bigger operation kernel. +The memory allocation optimization was actually not restricted to small operations graphs, but can also be applied to bigger operations graph as well. + +However these optimization may not be essential for bigger operation libraries like cxxnet, caffe. As you never find the compilation step in them. +Actually there is a ```compilation step```, that basically translate the layers into a fixed forward, backprop execution plan, by running each operation one by one. + +For computation graphs with smaller operations, these optimizations are crucial for performance. Because the operations are small, there are many subgraph patterns +that can be matched. Also because the final generated operations may not be able to enumerated, an explicit recompilation of the kernels is required, as opposed to +the fixed amount of pre-compiled kernels in the big operation libraries. This is the cause of compilation overhead of the symbolic libraries that support small operations. +The requirement of compilation optimization also creates overhead of engineering for the libraries that solely support smaller operations. + +Like in the symbolic vs imperative case. The bigger operation libraries "cheat" by asking user to provide restrictions(to the common layer provided), +so user is actually the one that does the subgraph matching. This removes the compilation overhead to the real brain, which is usually not too bad. + +### Expression Template and Statically Typed Language + +As we can see we always have a need to write small operations and compose them together. +Libraries like caffe use hand-carfted kernels to build up these bigger blocks. Otheriwse user have to compose up smaller operations from python side. + +Actually, there is a third choice, that works pretty well. This is called expression template. Basically, the idea is to use template programming to +generate genric kernels from expression tree at compile time. You can refer to the [Expression Template Tutorial](https://github.com/dmlc/mshadow/blob/master/guide/exp-template/README.md) +for more details. CXXNet is a library that makes extensive use of expression template, this enables much shorter and more readable code, with matched +peformance with hand crafted kernels. + +The difference between expression template and python kernel generation is that the expression evaluation is done at compile time of c++, with a existing type, +so there is no additional runtime overhead. This is also in princpile possible with other statically typed language that support template, +however we have only seen this trick in C++ so far. + +The expression template libraries creates a middle ground between python operations and hand crafted big kernels. To allow C++ users to craft efficient big +operations by composing smaller operations together. Which is also a choice worth considering. + +Mix The Flavors Together +------------------------ +Now we have compared the programming models, now comes the question of which you might want to choose. +Before we doing so, we should emphasize the the comparison made in this article may not necessary have big impact +depending on where the problems are. + +Remember [Amdahl's law](https://en.wikipedia.org/wiki/Amdahl%27s_law), if you are optimizing non performance critical +part of your problem, you won't get much of the performance gain. + +As we can see usually there is a trade-off between efficiency, flexiblity, engineering complexities. +And usually different programming styles fits into different parts of the problems. +For example, imperative programs are more natural for parameter update, and symbolic programs for gradient calculation. + +What this article advocate is to ***mix*** the flavors together. Recall Amdahl's law. Sometimes the part we want to be flexible +are not necessarily performance crucial, and it is OK to be a bit sloppy to support more flexible interfaces. +In machine learning, ensemble of different methods usually work better than a single one. + +If the programming models can be mixed together in a correct way, we could also get better benefit than a single programming model. +We will list some of the possible discussions here. + +### Symbolic and Imperative Programs +There are two ways to mix symbolic and imperative programs. +- Put imperative programs as part of symbolic programs as callbacks. +- Put symbolic programs as part of imperative programs. + +What we usually observe is that it is usually helpful to write parameter updates in an imperative way, +while the gradient calculations can be done more effectively in symbolic programs. + +The mix of programs is actually happening in existing symbolic libraries, because python itself is imperative. +For example, the following programs mixed the symbolic part together with numpy(which is imperative). +```python +A = Variable('A') +B = Variable('B') +C = B * A +D = C + Constant(1) +# compiles the function +f = compile(D) +d = f(A=np.ones(10), B=np.ones(10)*2) +d = d + 1.0 +``` +The idea is that the symbolic graphs are compiled into a function that can be executed imperatively. Whose internal is a blackbox to the user. +This is exactly like writing c++ programs and exposing them to python, which we commonly do. + +However, using numpy as imperative component might be indesirable, as the parameter memory resides on GPU. A better way might be supporting +a GPU compatible imperative library that interacts with symbolic compiled functions, or provide limited amount of updating syntax via +update statement in symbolic programs execution. + +### Small and Big Operations + +Combining small and big operations is also possible, and actually we might have a good reason to do it. Consider applications such as changing +a loss function or adding a few customized layers to an existing structure. What we usually can do is use big operations to compose up the existing +components, and use smaller operations to building up the new parts. + +Recall Amdahl's law, usually these new components may not be the bottleneck of computation. As the performance critical part is already optimized by +the bigger operations, it is even OK that we do not optimize these additional small operations at all, or only do a few memory optimization instead +of operation fusion and directly runnig them. + +### Choose your Own Flavors + +As we have compare the flavors of deep learning programs. The goal of this article is to list these choices and compare their tradeoffs. +There may not be a universal solution for all. But you can always choose your flavor, or combines the flavors you like to create +more interesting and intellegient deep learning libraries. + +Contribution to this Note +------------------------- +This note is part of our will to not only open-source system design notes for deep learning libraries. +You are more welcomed to contribute to this Note, by submitting a pull request. From d3129e776144d6db1c60effd6764b0fd3e1e219e Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 26 Sep 2015 22:59:44 -0700 Subject: [PATCH 04/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 55e5b9ac36f4..5f10a9c39d90 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)]() MXNet is an open source deep learning framework designed for both *efficiency* and *flexibility*. -It allows you to mix the [flavors]((http://mxnet.readthedocs.org/en/program_model.html) of +It allows you to mix the [flavors](http://mxnet.readthedocs.org/en/latest/program_model.html) of deep learning programs together to maximize the efficiency and your productivity. What's New From 7b6bf2a8c873366914920a0f8c2be5e610570bb7 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 26 Sep 2015 23:03:14 -0700 Subject: [PATCH 05/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5f10a9c39d90..41078d872c65 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/) [![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)]() -MXNet is an open source deep learning framework designed for both *efficiency* and *flexibility*. +MXNet is a deep learning framework designed for both *efficiency* and *flexibility*. It allows you to mix the [flavors](http://mxnet.readthedocs.org/en/latest/program_model.html) of deep learning programs together to maximize the efficiency and your productivity. From ec73910941a2267b7590d75fa59cfea7793322eb Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 26 Sep 2015 23:21:58 -0700 Subject: [PATCH 06/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41078d872c65..d19b993792ad 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ deep learning programs together to maximize the efficiency and your productivity What's New ---------- -* [Note on Programming Models for Deep Learning](http://mxnet.readthedocs.org/en/program_model.html) +* [Note on Programming Models for Deep Learning](http://mxnet.readthedocs.org/en/latest/program_model.html) Contents -------- From 676fd47a268691af1c3fa82f57f76da83fd4afa7 Mon Sep 17 00:00:00 2001 From: sneakerkg Date: Sun, 27 Sep 2015 17:21:02 +0800 Subject: [PATCH 07/13] remove python error when path incorrect --- src/io/iter_batchloader.h | 5 +++-- tests/python/unittest/test_io.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h index 00f46ec4f721..1ec28d13d65a 100644 --- a/src/io/iter_batchloader.h +++ b/src/io/iter_batchloader.h @@ -56,8 +56,7 @@ class BatchLoader : public IIterator { std::vector > kwargs_left; // init batch param, it could have similar param with kwargs_left = param_.InitAllowUnknown(kwargs); - // init base iterator - base_->Init(kwargs); + // init object attributes std::vector data_shape_vec; data_shape_vec.push_back(param_.batch_size); for (size_t shape_dim = 0; shape_dim < param_.data_shape.ndim(); ++shape_dim) { @@ -75,6 +74,8 @@ class BatchLoader : public IIterator { label_holder_ = mshadow::NewTensor(label_shape_.get<2>(), 0.0f); out_.data.push_back(TBlob(data_holder_)); out_.data.push_back(TBlob(label_holder_)); + // init base iterator + base_->Init(kwargs); } inline void BeforeFirst(void) { if (param_.round_batch == 0 || num_overflow_ == 0) { diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py index ed9ce358f24a..5ece7bac8023 100644 --- a/tests/python/unittest/test_io.py +++ b/tests/python/unittest/test_io.py @@ -85,6 +85,6 @@ def test_NDArrayIter(): assert(labelcount[i] == 100) if __name__ == "__main__": - test_NumpyIter() + #test_NDArrayIter() #test_MNISTIter() - #test_Cifar10Rec() + test_Cifar10Rec() From c22c8fe6b271131dc3ce75751ce978021b610843 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 27 Sep 2015 11:24:43 -0700 Subject: [PATCH 08/13] [PY] Add more executor functions to make it easy, improves the docs --- doc/python/ndarray.md | 38 +++++--- python/mxnet/__init__.py | 7 +- python/mxnet/callback.py | 18 ++-- python/mxnet/context.py | 3 +- python/mxnet/executor.py | 176 +++++++++++++++++++++++++++------- python/mxnet/kvstore.py | 17 ++-- python/mxnet/metric.py | 12 ++- python/mxnet/model.py | 16 ++-- python/mxnet/ndarray.py | 16 ++-- python/mxnet/symbol.py | 11 +-- python/mxnet/visualization.py | 5 +- 11 files changed, 219 insertions(+), 100 deletions(-) diff --git a/doc/python/ndarray.md b/doc/python/ndarray.md index d5cc48ee64db..6c70a938144d 100644 --- a/doc/python/ndarray.md +++ b/doc/python/ndarray.md @@ -8,9 +8,12 @@ Create NDArray Like `numpy`, you could create `mxnet.ndarray` like followings: ```python >>> import mxnet as mx ->>> a = mx.nd.zeros((100, 50)) # all-zero array of dimension 100x50 ->>> b = mx.nd.ones((256, 32, 128, 1)) # all-one array of dimension 256x32x128x1 ->>> c = mx.nd.array([[1, 2, 3], [4, 5, 6]]) # initialize array with contents +>>> # all-zero array of dimension 100x50 +>>> a = mx.nd.zeros((100, 50)) +>>> # all-one array of dimension 256x32x128x1 +>>> b = mx.nd.ones((256, 32, 128, 1)) +>>> # initialize array with contents +>>> c = mx.nd.array([[1, 2, 3], [4, 5, 6]]) ``` NDArray operations @@ -24,9 +27,11 @@ We provide some basic ndarray operations like arithmetic and slice operations. M >>> a.shape (100L, 50L) >>> b = mx.nd.ones((100, 50)) +>>> # c and d will be calculated in parallel here! >>> c = a + b ->>> d = a - b # c and d will be calculated in parallel here! ->>> b += d # inplace operation, b's contents will be modified, but c and d won't be affected. +>>> d = a - b +>>> # inplace operation, b's contents will be modified, but c and d won't be affected. +>>> b += d ``` ### Slice operations @@ -36,8 +41,8 @@ We provide some basic ndarray operations like arithmetic and slice operations. M >>> a[0:10] = 1 # first 10 rows will become 1 ``` -Conversion from/to `numpy.ndarray` and I/O --------------------------------- +Conversion from/to `numpy.ndarray` +---------------------------------- MXNet NDArray supports pretty nature way to convert from/to `mxnet.ndarray` to/from `numpy.ndarray`: ```python >>> import mxnet as mx @@ -50,13 +55,20 @@ MXNet NDArray supports pretty nature way to convert from/to `mxnet.ndarray` to/f array([ 1., 2., 3.], dtype=float32) ``` -We also provide two convenient functions to help save and load file from I/O: +Save Load NDArray +----------------- +You can always use pickle to save and load NDArrays. +We also provide functions to help save and load list or dictionary of NDArrays from file systems. ```python >>> import mxnet as mx >>> a = mx.nd.zeros((100, 200)) ->>> mx.nd.save("/path/to/array/file", a) ->>> mx.nd.save("s3://path/to/s3/array", a) ->>> mx.nd.save("hdfs://path/to/hdfs/array", a) +>>> b = mx.nd.zeros((100, 200)) +>>> # save list of NDArrays +>>> mx.nd.save("/path/to/array/file", [a, b]) +>>> # save dictionary of NDArrays to AWS S3 +>>> mx.nd.save("s3://path/to/s3/array", {'A' : a, 'B' : b}) +>>> # save list of NDArrays to hdfs. +>>> mx.nd.save("hdfs://path/to/hdfs/array", [a, b]) >>> from_file = mx.nd.load("/path/to/array/file") >>> from_s3 = mx.nd.load("s3://path/to/s3/array") >>> from_hdfs = mx.nd.load("hdfs://path/to/hdfs/array") @@ -65,8 +77,8 @@ The good thing about using the above `save` and `load` interface is that: - You could use the format across all `mxnet` language bindings. - Already support S3 and HDFS. -Multi-device support -------------------- +Multi-device Support +-------------------- The device information is stored in `mxnet.Context` structure. When creating ndarray in mxnet, user could either use the context argument (default is CPU context) to create arrays on specific device or use the `with` statement as follows: ```python >>> import mxnet as mx diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index b87b9dad924c..e2da1e1b57a0 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -"""MXNet: a concise, fast and flexible framework for deep learning - -MXNet is a project that evolves from cxxnet, minerva and purine2. -The interface is designed in collaboration by authors of three projects. - -""" +"""MXNet: a concise, fast and flexible framework for deep learning. """ from __future__ import absolute_import from .context import Context, current_context, cpu, gpu diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index 4d121adf7670..03fcb5a85071 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -1,5 +1,7 @@ -# pylint: disable=logging-not-lazy, blacklisted-name, invalid-name -"""model helper for knowing training status""" +# coding: utf-8 +"""Callback functions that can be used to track various status during iteration.""" +from __future__ import absolute_import + import sys import math import logging @@ -19,11 +21,12 @@ def do_checkpoint(prefix): callback : function The callback function that can be passed as iter_end_callback to fit. """ - def _callback(iter_no, s, arg, aux): + def _callback(iter_no, sym, arg, aux): """The checkpoint function.""" - save_checkpoint(prefix, iter_no + 1, s, arg, aux) + save_checkpoint(prefix, iter_no + 1, sym, arg, aux) return _callback + class Speedometer(object): """Calculate training speed in frequent @@ -57,12 +60,13 @@ def __call__(self, count): if self.init: if count % self.frequent == 0: speed = self.frequent * self.batch_size / (time.time() - self.tic) - logging.info("Batch [%d]\tSpeed: %.2f samples/sec" % (count, speed)) + logging.info("Batch [%d]\tSpeed: %.2f samples/sec", count, speed) self.tic = time.time() else: self.init = True self.tic = time.time() + class ProgressBar(object): """Show a progress bar @@ -89,7 +93,7 @@ def __call__(self, count): filled_len = int(round(self.bar_len * count / float(self.total))) percents = math.ceil(100.0 * count / float(self.total)) - bar = '=' * filled_len + '-' * (self.bar_len - filled_len) - sys.stdout.write('[%s] %s%s\r' % (bar, percents, '%')) + prog_bar = '=' * filled_len + '-' * (self.bar_len - filled_len) + sys.stdout.write('[%s] %s%s\r' % (prog_bar, percents, '%')) diff --git a/python/mxnet/context.py b/python/mxnet/context.py index 6ed801eaa7f9..1ed3dae5fb23 100644 --- a/python/mxnet/context.py +++ b/python/mxnet/context.py @@ -1,5 +1,5 @@ # coding: utf-8 -""" code for context management """ +"""Context management API of mxnet.""" from __future__ import absolute_import class Context(object): @@ -19,7 +19,6 @@ class Context(object): Examples -------- - Switch default context example: >>> # array on cpu >>> cpu_array = mx.md.ones((2, 3)) >>> # switch default context to GPU(2) diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py index 57a1ad1d238c..631204962152 100644 --- a/python/mxnet/executor.py +++ b/python/mxnet/executor.py @@ -1,6 +1,6 @@ # coding: utf-8 -# pylint: disable=invalid-name, protected-access, too-many-locals, fixme -""" code for executor. """ +# pylint: disable=invalid-name, protected-access, too-many-locals +"""Symbolic Executor component of MXNet.""" from __future__ import absolute_import import ctypes @@ -11,13 +11,17 @@ class Executor(object): """ Executor is the actual executing object of MXNet.""" - def __init__(self, handle): - """Init an executor from handle + def __init__(self, handle, symbol): + """Constructor, used Symbol.bind and Symbol.simple_bind instead. Parameters ---------- handle: ExecutorHandle ExecutorHandle generated by calling Bind + + See Also + -------- + Symbol.bind : to create executor """ if not isinstance(handle, ExecutorHandle): raise TypeError("Handle type error") @@ -26,41 +30,162 @@ def __init__(self, handle): self.grad_arrays = [] self.aux_arrays = [] self.outputs = self._get_outputs() + self._symbol = symbol + self._arg_dict = None + self._grad_dict = None + self._aux_dict = None + + @staticmethod + def _get_dict(names, ndarrays): + """Get the dictionary given name and ndarray pairs.""" + nset = set() + for nm in names: + if nm in nset: + raise ValueError('Duplicate names detected, %s' % str(names)) + nset.add(nm) + return dict(zip(names, ndarrays)) + + def _get_outputs(self): + """list all the output ndarray - def forward(self, is_train=True): - """Do forward. + Returns + ------- + A list of ndarray binded to the heads of executor. + """ + out_size = mx_uint() + handles = ctypes.POINTER(NDArrayHandle)() + check_call(_LIB.MXExecutorOutputs(self.handle, + ctypes.byref(out_size), ctypes.byref(handles))) + return [NDArray(NDArrayHandle(handles[i])) for i in range(out_size.value)] + + def forward(self, is_train=False, **kwargs): + """Calculate the outputs specified by the binded symbol. Parameters ---------- - is_train: bool - whether this forward is for evaluation purpose + is_train: bool, optional + whether this forward is for evaluation purpose. + + **kwargs + Additional specification of input arguments. + + Examples + -------- + >>> # doing forward by specifying data + >>> texec.forward(is_train=True, data=mydata) + >>> # doing forward by not specifying things, but copy to the executor before hand + >>> mydata.copyto(texec.arg_dict['data']) + >>> texec.forward(is_train=True) """ + if len(kwargs) != 0: + arg_dict = self.arg_dict + for name, array in kwargs.items(): + if not isinstance(array, NDArray): + raise ValueError('only accept keyword argument of NDArrays') + if name not in arg_dict: + raise TypeError('Unknown argument %s' % name) + array.copyto(arg_dict[name]) + check_call(_LIB.MXExecutorForward( self.handle, ctypes.c_int(int(is_train)))) - def backward(self, head_grads=None): - """Do backward on heads' gradient. + def backward(self, out_grads=None): + """Do backward pass to get the gradient of arguments. Parameters ---------- - head_grads : NDArray or list of NDArray, optional - Gradient on the heads + out_grads : NDArray or list of NDArray, optional + Gradient on the outputs to be propagated back. + This parameter is only needed when bind is called + on outputs that are not a loss function. """ - if head_grads is None: - head_grads = [] - elif isinstance(head_grads, NDArray): - head_grads = [head_grads] + if out_grads is None: + out_grads = [] + elif isinstance(out_grads, NDArray): + out_grads = [out_grads] - for obj in head_grads: + for obj in out_grads: if not isinstance(obj, NDArray): raise TypeError("inputs must be NDArray") - ndarray = c_array(NDArrayHandle, [item.handle for item in head_grads]) + ndarray = c_array(NDArrayHandle, [item.handle for item in out_grads]) check_call(_LIB.MXExecutorBackward( self.handle, - mx_uint(len(head_grads)), + mx_uint(len(out_grads)), ndarray)) + @property + def arg_dict(self): + """Get dictionary representation of argument arrrays. + + Returns + ------- + arg_dict : dict of str to NDArray + The dictionary that maps name of arguments to NDArrays. + + Raises + ------ + ValueError : if there are duplicated names in the arguments. + """ + if self._arg_dict is None: + self._arg_dict = Executor._get_dict( + self._symbol.list_arguments(), self.arg_arrays) + return self._arg_dict + + @property + def aux_dict(self): + """Get dictionary representation of auxiliary states arrays. + + Returns + ------- + aux_dict : dict of str to NDArray + The dictionary that maps name of auxiliary states to NDArrays. + + Raises + ------ + ValueError : if there are duplicated names in the auxiliary states. + """ + if self._aux_dict is None: + self._aux_dict = Executor._get_dict( + self._symbol.list_auxiliary_states(), self.aux_arrays) + return self._aux_dict + + def copy_params_from(self, arg_params, aux_params=None, allow_extra_params=False): + """Copy parameters from arg_params, aux_params into executor's internal array. + + Parameters + ---------- + arg_params : dict of str to NDArray + Parameters, dict of name to NDArray of arguments + + aux_params : dict of str to NDArray, optional + Parameters, dict of name to NDArray of auxiliary states. + + allow_extra_params : boolean, optional + Whether allow extra parameters that are not needed by symbol + If this is True, no error will be thrown when arg_params or aux_params + contain extra parameters that is not needed by the executor. + + Raises + ------ + ValueError + If there is additional parameters in the dict but allow_extra_params=False + """ + for name, array in arg_params.items(): + if name in self.arg_dict: + array.copyto(self.arg_dict[name]) + else: + if not allow_extra_params: + raise ValueError('Find name \"%s\" that is not in the arguments' % name) + if aux_params is None: + aux_params = {} + for name, array in aux_params.items(): + if name in self.aux_dict: + array.copyto(self.aux_dict[name]) + else: + if not allow_extra_params: + raise ValueError('Find name %s that is not in the auxiliary states' % name) + def debug_str(self): """Get a debug string about internal execution plan. @@ -73,16 +198,3 @@ def debug_str(self): check_call(_LIB.MXExecutorPrint( self.handle, ctypes.byref(debug_str))) return py_str(debug_str.value) - - def _get_outputs(self): - """list all heads' output ndarray - - Returns - ------- - A list of ndarray binded to the heads of executor. - """ - out_size = mx_uint() - handles = ctypes.POINTER(NDArrayHandle)() - check_call(_LIB.MXExecutorOutputs(self.handle, - ctypes.byref(out_size), ctypes.byref(handles))) - return [NDArray(NDArrayHandle(handles[i])) for i in range(out_size.value)] diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore.py index 3d26c3cf4e50..44be43892824 100644 --- a/python/mxnet/kvstore.py +++ b/python/mxnet/kvstore.py @@ -1,6 +1,5 @@ # coding: utf-8 -# pylint: disable=invalid-name, global-statement -""" KVStore in mxnet """ +""" Key value store interface of MXNet for parameter synchronization.""" from __future__ import absolute_import import ctypes @@ -19,10 +18,10 @@ def _ctype_key_value(keys, vals): return (c_array(ctypes.c_int, [keys]), c_array(NDArrayHandle, [vals.handle])) else: - for v in vals: - assert(isinstance(v, NDArray)) + for value in vals: + assert(isinstance(value, NDArray)) return (c_array(ctypes.c_int, [keys] * len(vals)), - c_array(NDArrayHandle, [v.handle for v in vals])) + c_array(NDArrayHandle, [value.handle for value in vals])) else: assert(len(keys) == len(vals)) for k in keys: @@ -66,7 +65,7 @@ def __del__(self): def init(self, key, value): """ Initialize a single or a sequence of key-value pairs into the store. - For each key, one must init it before push and pull + For each key, one must init it before push and pull. Parameters ---------- @@ -102,8 +101,10 @@ def push(self, key, value, priority=0): ---------- key : int or list of int Keys + value : NDArray or list of NDArray or list of list of NDArray According values + priority : int, optional The priority of the push operation. The higher the priority, the faster this action is likely @@ -150,14 +151,16 @@ def push(self, key, value, priority=0): ctypes.c_int(priority))) def pull(self, key, out=None, priority=0): - """ Pull a single value or a sequence of values from the store + """ Pull a single value or a sequence of values from the store. Parameters ---------- key : int or list of int Keys + out: NDArray or list of NDArray or list of list of NDArray According values + priority : int, optional The priority of the push operation. The higher the priority, the faster this action is likely diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index e53a46770464..98c2861fb149 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -1,5 +1,7 @@ -# pylint: disable=invalid-name +# coding: utf-8 """Online evaluation metric module.""" +from __future__ import absolute_import + from .base import string_types import numpy @@ -48,8 +50,8 @@ def __init__(self): def update(self, label, pred): pred = pred.asnumpy() label = label.asnumpy().astype('int32') - py = numpy.argmax(pred, axis=1) - self.sum_metric += numpy.sum(py == label) + pred_label = numpy.argmax(pred, axis=1) + self.sum_metric += numpy.sum(pred_label == label) self.num_inst += label.size @@ -76,7 +78,7 @@ def update(self, label, pred): self.sum_metric += self._feval(label, pred) self.num_inst += 1 - +# pylint: disable=invalid-name def np(numpy_feval, name=None): """Create a customized metric from numpy function. @@ -93,7 +95,7 @@ def feval(label, pred): return numpy_feval(label.asnumpy(), pred.asnumpy()) feval.__name__ = numpy_feval.__name__ return CustomMetric(feval, name) - +# pylint: enable=invalid-name def create(metric): """Create an evaluation metric. diff --git a/python/mxnet/model.py b/python/mxnet/model.py index 270b462af987..761fb44857af 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -1,5 +1,5 @@ # pylint: disable=fixme, invalid-name, too-many-arguments, too-many-locals -# pylint: disable=too-many-branches, too-many-statements, unused-argument +# pylint: disable=too-many-branches, too-many-statements """MXNet model module""" from __future__ import absolute_import @@ -200,14 +200,10 @@ def _train_multi_device(symbol, ctx, input_shape, aux_blocks = [ [x.aux_arrays[index] for x in train_execs] for index in range(len(train_execs[0].aux_arrays))] - for name, block in zip(arg_names, arg_blocks): - if name in arg_params: - for w in block: - arg_params[name].copyto(w) - for name, block in zip(aux_names, aux_blocks): - if name in aux_params: - for w in block: - aux_params[name].copyto(w) + + for texec in train_execs: + texec.copy_params_from(arg_params, aux_params) + # ky value store kv = kvstore.create() if num_device != 1 else None opt_state_blocks = [] @@ -246,7 +242,7 @@ def _train_multi_device(symbol, ctx, input_shape, data[islice].copyto(target) # forward backward pass for texec, islice in zip(train_execs, slices): - texec.forward() + texec.forward(is_train=True) texec.outputs[0].copyto(out_cpu_array[islice]) for texec in train_execs: texec.backward() diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py index 5418047ee27f..cd6c00e2f6e0 100644 --- a/python/mxnet/ndarray.py +++ b/python/mxnet/ndarray.py @@ -1,5 +1,5 @@ # coding: utf-8 -"""NDArray interface of mxnet""" +"""NDArray API of mxnet.""" from __future__ import absolute_import import ctypes @@ -438,9 +438,10 @@ def load(fname): fname : str The name of the file.Can be S3 or HDFS address (remember built with S3 support). Example of fname: - - s3://my-bucket/path/my-s3-ndarray - - hdfs://my-bucket/path/my-hdfs-ndarray - - /path-to/my-local-ndarray + + - `s3://my-bucket/path/my-s3-ndarray` + - `hdfs://my-bucket/path/my-hdfs-ndarray` + - `/path-to/my-local-ndarray` Returns ------- @@ -479,9 +480,10 @@ def save(fname, data): fname : str The name of the file.Can be S3 or HDFS address (remember built with S3 support). Example of fname: - - s3://my-bucket/path/my-s3-ndarray - - hdfs://my-bucket/path/my-hdfs-ndarray - - /path-to/my-local-ndarray + + - `s3://my-bucket/path/my-s3-ndarray` + - `hdfs://my-bucket/path/my-hdfs-ndarray` + - `/path-to/my-local-ndarray` data : list of NDArray or dict of str to NDArray The data to be saved. diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index e8b8af78fe3b..a52dda2fce4d 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -1,10 +1,6 @@ # coding: utf-8 -# pylint: disable=invalid-name, protected-access, fixme, too-many-arguments -"""Symbolic support of mxnet. - -Symbolic API of MXNet - -""" +# pylint: disable=invalid-name, protected-access, too-many-arguments +"""Symbolic configuration API of mxnet.""" from __future__ import absolute_import import ctypes @@ -571,8 +567,7 @@ def bind(self, ctx, args, args_grad=None, grad_req='write', aux_states=None): mx_uint(len(aux_states)), aux_args_handle, ctypes.byref(handle))) - executor = Executor(handle) - + executor = Executor(handle, self) executor.arg_arrays = args executor.grad_arrays = args_grad executor.aux_arrays = aux_states diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py index 686f8cca3554..54f6c924ecdf 100644 --- a/python/mxnet/visualization.py +++ b/python/mxnet/visualization.py @@ -1,7 +1,6 @@ # coding: utf-8 -# pylint: disable=invalid-name, protected-access, too-many-locals, fixme -# pylint: disable=unused-argument, too-many-branches, too-many-statements -# pylint: disable=unused-variable +# pylint: disable=invalid-name, too-many-locals, fixme +# pylint: disable=too-many-branches, too-many-statements """Visualization module""" from __future__ import absolute_import From dc505866b0270e267330c3b42093fcd3dc3ded2b Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 27 Sep 2015 12:28:40 -0700 Subject: [PATCH 09/13] Minor updates on doc --- doc/python/tutorial.md | 33 +++++++++++++++++++++++++-------- python/mxnet/__init__.py | 1 + python/mxnet/ndarray.py | 2 ++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/doc/python/tutorial.md b/doc/python/tutorial.md index fb818f7c5071..14d92c2bb26a 100644 --- a/doc/python/tutorial.md +++ b/doc/python/tutorial.md @@ -315,17 +315,34 @@ shape inconsistency. ### Bind the Symbols and Run -Now we can bind the free variables of the symbol and perform forward and -backward. +Now we can bind the free variables of the symbol and perform forward and backward. +The bind function will create a ```Executor``` that can be used to carry out the real computations. ```python ->>> in_shape = (128, 3, 100, 100) # minibatch_size, #channel, image_width, image_height ->>> executor = net.simple_bind(mx.gpu(), data = mx.nd.empty(in_shape, mx.gpu()) ->>> # feed data and label.. ->>> executor.forward() ->>> executor.backward() ->>> print executor.outputs[0].asnumpy() +>>> # define computation graphs +>>> A = mx.symbol.Variable('A') +>>> B = mx.symbol.Variable('B') +>>> C = A * B +>>> a = mx.nd.ones(3) * 4 +>>> b = mx.nd.ones(3) * 2 +>>> # bind the symbol with real arguments +>>> c_exec = C.bind(ctx=mx.cpu(), args={'A' : a, 'B': b}) +>>> # do forward pass calclation. +>>> c_exec.forward() +>>> c_exec.outputs[0].asnumpy() +[ 8. 8. 8.] ``` +For neural nets, a more commonly used pattern is ```simple_bind```, which will create +all the arguments arrays for you. Then you can call forward, and backward(if gradient is needed) +to get the gradient. +```python +>>> # define computation graphs +>>> net = some symbol +>>> texec = net.simple_bind(data=input_shape) +>>> texec.forward() +>>> texec.backward() +``` +The [model API](../../python/mxnet/model.py) is a thin wrapper around the symbolic executors to support neural net training. ### How Efficient is Symbolic API diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index e2da1e1b57a0..5792df7c4039 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -9,6 +9,7 @@ from . import ndarray from . import name from . import symbol +# use mx.kv as short for kvstore from . import kvstore as kv from . import io # use mx.nd as short for mx.ndarray diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py index cd6c00e2f6e0..642d834cf7dc 100644 --- a/python/mxnet/ndarray.py +++ b/python/mxnet/ndarray.py @@ -355,6 +355,8 @@ def empty(shape, ctx=None): out: Array The created NDArray. """ + if isinstance(shape, int): + shape = (shape, ) if ctx is None: ctx = Context.default_ctx return NDArray(handle=_new_alloc_handle(shape, ctx, False)) From 49d419e94b98622df398f5a0c887dc8018ed7a3e Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 27 Sep 2015 15:53:15 -0600 Subject: [PATCH 10/13] Update README.md --- example/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/example/README.md b/example/README.md index 0765885349f8..8564e1e82d75 100644 --- a/example/README.md +++ b/example/README.md @@ -4,10 +4,10 @@ This folder contains examples of MXNet. Notebooks -------- -* [composite symbol](composite_symbol.ipynb) gives you a demo of how to composite a symbolic Inception-BatchNorm Network -* [cifar-10 recipe](cifar-recipe.ipynb) gives you a step by step demo of how to use MXNet -* [cifar-100](cifar-100.ipynb) gives you a demo of how to train a 75.68% accuracy CIFAR-100 model -* [predict with pretained model](predict-with-pretrained-model.ipynb) gives you a demo of use a pretrained Inception-BN Network +* [composite symbol](notebooks/composite_symbol.ipynb) gives you a demo of how to composite a symbolic Inception-BatchNorm Network +* [cifar-10 recipe](notebooks/cifar-recipe.ipynb) gives you a step by step demo of how to use MXNet +* [cifar-100](notebooks/cifar-100.ipynb) gives you a demo of how to train a 75.68% accuracy CIFAR-100 model +* [predict with pretained model](notebooks/predict-with-pretrained-model.ipynb) gives you a demo of use a pretrained Inception-BN Network Contents From cf13244303a480b7accf8ad69d89386533ede208 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 27 Sep 2015 20:10:53 -0400 Subject: [PATCH 11/13] [MODEL] Support update on kvstore --- example/imagenet/alexnet.py | 6 ++-- example/imagenet/data.py | 8 ++--- python/mxnet/model.py | 59 ++++++++++++++++++++++++++-------- tests/python/train/test_mlp.py | 3 +- 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/example/imagenet/alexnet.py b/example/imagenet/alexnet.py index e4e1663406c4..9a74631a2174 100644 --- a/example/imagenet/alexnet.py +++ b/example/imagenet/alexnet.py @@ -16,7 +16,7 @@ conv2 = mx.symbol.Convolution( data=lrn1, kernel=(5, 5), pad=(2, 2), num_filter=256) relu2 = mx.symbol.Activation(data=conv2, act_type="relu") -pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2)) +pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2), pool_type="max") lrn2 = mx.symbol.LRN(data=pool2, alpha=0.0001, beta=0.75, knorm=1, nsize=5) # stage 3 conv3 = mx.symbol.Convolution( @@ -28,7 +28,7 @@ conv5 = mx.symbol.Convolution( data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256) relu5 = mx.symbol.Activation(data=conv5, act_type="relu") -pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2)) +pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max") # stage 4 flatten = mx.symbol.Flatten(data=pool3) fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096) @@ -48,7 +48,7 @@ train, val = ilsvrc12_iterator(batch_size=batch_size, input_shape=(3,224,224)) ## train -num_gpus = 2 +num_gpus = 4 gpus = [mx.gpu(i) for i in range(num_gpus)] model = mx.model.FeedForward( ctx = gpus, diff --git a/example/imagenet/data.py b/example/imagenet/data.py index cfca1db5e084..2f53902b3c96 100644 --- a/example/imagenet/data.py +++ b/example/imagenet/data.py @@ -7,8 +7,8 @@ def ilsvrc12_iterator(batch_size, input_shape): """return train and val iterators for imagenet""" train_dataiter = mx.io.ImageRecordIter( - path_imgrec = "data/ilsvrc12/train.rec", - mean_img = "data/ilsvrc12/mean.bin", + path_imgrec = "data/train.rec", + mean_img = "data/mean.bin", rand_crop = True, rand_mirror = True, prefetch_buffer = 4, @@ -16,8 +16,8 @@ def ilsvrc12_iterator(batch_size, input_shape): data_shape = input_shape, batch_size = batch_size) val_dataiter = mx.io.ImageRecordIter( - path_imgrec = "data/ilsvrc12/val.rec", - mean_img = "data/ilsvrc12/mean.bin", + path_imgrec = "data/val.rec", + mean_img = "data/mean.bin", rand_crop = False, rand_mirror = False, prefetch_buffer = 4, diff --git a/python/mxnet/model.py b/python/mxnet/model.py index c6f1665f1524..b0b4f46ccb65 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -122,6 +122,7 @@ def _train_multi_device(symbol, ctx, input_shape, begin_round, end_round, optimizer, train_data, eval_data=None, eval_metric=None, iter_end_callback=None, epoch_end_callback=None, + update_on_kvstore=False, logger=None): """Internal training function on multiple devices. @@ -172,12 +173,18 @@ def _train_multi_device(symbol, ctx, input_shape, epoch_end_callback: callable(iteration) A callback that is invoked at end of each batch + update_on_kvstore: boolean, optional + Whether to perform parameter update on kvstore instead of training device. + logger : logging logger When not specified, default logger will be used. Notes ----- - This function will inplace update the NDArrays in arg_parans and aux_states. + - This function will inplace update the NDArrays in arg_parans and aux_states. + - Turning update_on_kvstore on and off can affect speed of multi-gpu training. + - update_on_kvstore=True works well for inception type nets that contains many small weights. + - update_on_kvstore=False works better for Alexnet style net with bulk weights. """ if logger is None: logger = logging @@ -203,9 +210,11 @@ def _train_multi_device(symbol, ctx, input_shape, for texec in train_execs: texec.copy_params_from(arg_params, aux_params) - # ky value store kv = kvstore.create() if num_device != 1 else None + if kv is None: + update_on_kvstore = False + opt_state_blocks = [] # If there are multiple devices, initialize the weights. for index, pair in enumerate(zip(arg_blocks, grad_blocks)): @@ -214,11 +223,20 @@ def _train_multi_device(symbol, ctx, input_shape, if kv: kv.init(index, arg_list[0]) # attach state direct to weight - opt_list = [optimizer.create_state(index, w) for w in arg_list] - opt_state_blocks.append(opt_list) + if update_on_kvstore: + opt_state_blocks.append(nd.zeros(arg_list[0].shape, cpu())) + else: + opt_list = [optimizer.create_state(index, w) for w in arg_list] + opt_state_blocks.append(opt_list) else: opt_state_blocks.append(None) + def kv_updater(index, grad, weight): + """Internal updater on KVstore, used when update_on_kvstore=True.""" + optimizer.update(index, weight, grad, opt_state_blocks[index]) + if update_on_kvstore: + kv.set_updater(kv_updater) + # Input and output data structure data_index, label_index = _check_arguments(symbol) merged_shape = list(train_execs[0].outputs[0].shape) @@ -255,12 +273,17 @@ def _train_multi_device(symbol, ctx, input_shape, if kv: # push gradient, priority is negative index kv.push(index, grad_list, priority=-index) - # pull back the sum, to the same locations. - kv.pull(index, grad_list, priority=-index) - opt_list = opt_state_blocks[index] - # optimizea - for w, g, state in zip(arg_list, grad_list, opt_list): - optimizer.update(index, w, g, state) + if update_on_kvstore: + # pull back the weights + kv.pull(index, arg_list, priority=-index) + else: + # pull back the sum gradients, to the same locations. + kv.pull(index, grad_list, priority=-index) + if not update_on_kvstore: + opt_list = opt_state_blocks[index] + # optimizea + for w, g, state in zip(arg_list, grad_list, opt_list): + optimizer.update(index, w, g, state) nbatch += 1 # epoch callback (for print purpose) if epoch_end_callback != None: @@ -562,7 +585,8 @@ def predict(self, X): return np.concatenate(outputs) def fit(self, X, y=None, eval_data=None, eval_metric='acc', - iter_end_callback=None, epoch_end_callback=None, logger=None): + iter_end_callback=None, epoch_end_callback=None, + update_on_kvstore=False, logger=None): """Fit the model. Parameters @@ -592,6 +616,9 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', A callback that is invoked at end of each batch For print purpose + update_on_kvstore: boolean, optional + Whether to perform parameter update on kvstore instead of training device. + logger : logging logger, optional When not specified, default logger will be used. """ @@ -622,7 +649,7 @@ def fit(self, X, y=None, eval_data=None, eval_metric='acc', eval_metric=eval_metric, iter_end_callback=iter_end_callback, epoch_end_callback=epoch_end_callback, - logger=logger) + update_on_kvstore=update_on_kvstore, logger=logger) def save(self, prefix, iteration=None): """Checkpoint the model checkpoint into file. @@ -684,7 +711,7 @@ def load(prefix, iteration, ctx=None): def create(symbol, X, y=None, ctx=None, num_round=None, optimizer='sgd', initializer=Xavier(), eval_data=None, eval_metric='acc', iter_end_callback=None, - logger=None, **kwargs): + update_on_kvstore=False, logger=None, **kwargs): """Functional style to create a model. This function will be more consistent with functional @@ -726,10 +753,14 @@ def create(symbol, X, y=None, ctx=None, A callback that is invoked at end of each iteration. This can be used to checkpoint model each iteration. + update_on_kvstore: boolean, optional + Whether to perform parameter update on kvstore instead of training device. + logger : logging logger, optional """ model = FeedForward(symbol, ctx=ctx, num_round=num_round, optimizer=optimizer, initializer=initializer, **kwargs) model.fit(X, y, eval_data=eval_data, eval_metric=eval_metric, - iter_end_callback=iter_end_callback, logger=logger) + iter_end_callback=iter_end_callback, + update_on_kvstore=update_on_kvstore, logger=logger) return model diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 14e8f6c700a8..fdf89142fc11 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -53,7 +53,8 @@ def test_mlp(): ctx=[mx.cpu(i) for i in range(2)], num_round=num_round, learning_rate=0.01, wd=0.0004, - momentum=0.9) + momentum=0.9, + update_on_kvstore=True) logging.info('Finish traning...') prob = model.predict(val_dataiter) From 06ad14d1a9f6651a3da8dc0508ee55e6dd3b9105 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 27 Sep 2015 17:57:23 -0700 Subject: [PATCH 12/13] [STORAGE] Change storage to pure interface, move lock to pooled manager --- include/mxnet/storage.h | 15 +--- src/storage/pooled_storage_manager.h | 11 ++- src/storage/storage.cc | 110 +++++++++++++-------------- 3 files changed, 63 insertions(+), 73 deletions(-) diff --git a/include/mxnet/storage.h b/include/mxnet/storage.h index 71d303ff01f3..da7a8aaa5388 100644 --- a/include/mxnet/storage.h +++ b/include/mxnet/storage.h @@ -39,16 +39,16 @@ class Storage { * \param ctx Context information about the device and ID. * \return Handle struct. */ - Handle Alloc(size_t size, Context ctx); + virtual Handle Alloc(size_t size, Context ctx) = 0; /*! * \brief Free storage. * \param handle Handle struect. */ - void Free(Handle handle); + virtual void Free(Handle handle) = 0; /*! * \brief Destructor. */ - ~Storage(); + virtual ~Storage() {} /*! * \return Storage singleton. */ @@ -62,15 +62,6 @@ class Storage { * \return A shared pointer to Storage singleton. */ static std::shared_ptr _GetSharedRef(); - - private: - /*! - * \brief Hidden constructors. - */ - Storage(); - struct Impl; - std::unique_ptr impl_; - DISALLOW_COPY_AND_ASSIGN(Storage); }; // class Storage } // namespace mxnet #endif // MXNET_STORAGE_H_ diff --git a/src/storage/pooled_storage_manager.h b/src/storage/pooled_storage_manager.h index 92cbc55951a3..7d3c0dcb7802 100644 --- a/src/storage/pooled_storage_manager.h +++ b/src/storage/pooled_storage_manager.h @@ -6,10 +6,11 @@ #ifndef MXNET_STORAGE_POOLED_STORAGE_MANAGER_H_ #define MXNET_STORAGE_POOLED_STORAGE_MANAGER_H_ +#include #include #include -#include "storage_manager.h" -#include "mxnet/base.h" +#include +#include "./storage_manager.h" namespace mxnet { namespace storage { @@ -35,13 +36,18 @@ class PooledStorageManager final : public StorageManager { private: void ReleaseAll(); + // internal mutex + std::mutex mutex_; + // used memory size_t used_memory_ = 0; + // memory pool std::unordered_map> memory_pool_; DISALLOW_COPY_AND_ASSIGN(PooledStorageManager); }; // class PooledStorageManager template void* PooledStorageManager::Alloc(size_t size) { + std::lock_guard lock(mutex_); auto&& reuse_it = memory_pool_.find(size); if (reuse_it == memory_pool_.end() || reuse_it->second.size() == 0) { if (kThreshold <= used_memory_) { @@ -60,6 +66,7 @@ void* PooledStorageManager::Alloc(size_t size) { template void PooledStorageManager::Free(void* ptr, size_t size) { + std::lock_guard lock(mutex_); auto&& reuse_pool = memory_pool_[size]; reuse_pool.push_back(ptr); } diff --git a/src/storage/storage.cc b/src/storage/storage.cc index 08af99621b40..4e9c85b71f74 100644 --- a/src/storage/storage.cc +++ b/src/storage/storage.cc @@ -5,21 +5,25 @@ #include #include #include -#include -#include -#include "storage_manager.h" -#include "naive_storage_manager.h" -#include "pooled_storage_manager.h" -#include "cpu_device_storage.h" -#include "gpu_device_storage.h" -#include "pinned_memory_storage.h" +#include "./storage_manager.h" +#include "./naive_storage_manager.h" +#include "./pooled_storage_manager.h" +#include "./cpu_device_storage.h" +#include "./gpu_device_storage.h" +#include "./pinned_memory_storage.h" #include "../common/cuda_utils.h" -#include "../common/utils.h" +#include "../common/lazy_alloc_array.h" namespace mxnet { // consider change storage as a pure abstract class -struct Storage::Impl { +class StorageImpl : public Storage { + public: + Handle Alloc(size_t size, Context ctx) override; + void Free(Handle handle) override; + virtual ~StorageImpl() = default; + + private: static constexpr size_t kPoolThreshold = 4096 * 1024 * 1024ul; static constexpr size_t kMaxNumberOfDevices = Context::kMaxDevType + 1; static constexpr size_t kMaxNumberOfDeviceIDs = Context::kMaxDevID + 1; @@ -43,64 +47,56 @@ struct Storage::Impl { LOG(FATAL) << "Unimplemented device"; } } - - std::array, - kMaxNumberOfDeviceIDs>, - kMaxNumberOfDevices> storage_managers; - std::mutex m; + // internal storage managers + std::array, + kMaxNumberOfDevices> storage_managers_; }; // struct Storage::Impl -Storage::Handle Storage::Alloc(size_t size, Context ctx) { +Storage::Handle StorageImpl::Alloc(size_t size, Context ctx) { // space already recycled, ignore request Handle hd; hd.ctx = ctx; hd.size = size; - { - std::lock_guard lock{impl_->m}; - auto&& device = impl_->storage_managers.at(ctx.dev_type); - auto&& device_id_it = device.at(ctx.dev_id); - // Allocate device if necessary. - if (!device_id_it) { - switch (ctx.dev_type) { - case Context::kCPU: { - device_id_it = common::MakeUnique< - Storage::Impl::CurrentStorageManager< - storage::CPUDeviceStorage>>(); - break; - } - case Context::kCPUPinned: { - device_id_it = common::MakeUnique< - Storage::Impl::CurrentStorageManager< - storage::PinnedMemoryStorage>>(); - break; + auto&& device = storage_managers_.at(ctx.dev_type); + storage::StorageManager *manager = device.Get( + ctx.dev_id, [ctx]() { + storage::StorageManager *ptr = nullptr; + switch (ctx.dev_type) { + case Context::kCPU: { + ptr = new CurrentStorageManager(); + break; + } + case Context::kCPUPinned: { + ptr = new CurrentStorageManager(); + break; + } + case Context::kGPU: { + ptr = new CurrentStorageManager(); + break; + } + default: LOG(FATAL) << "Unimplemented device"; } - case Context::kGPU: { - device_id_it = common::MakeUnique>(); - break; - } - default: - LOG(FATAL) << "Unimplemented device"; - } - } - Impl::ActivateDevice(ctx); - hd.dptr = device_id_it->Alloc(size); - } + return ptr; + }); + this->ActivateDevice(ctx); + hd.dptr = manager->Alloc(size); return hd; } -void Storage::Free(Storage::Handle handle) { - std::lock_guard lock{impl_->m}; - Impl::ActivateDevice(handle.ctx); - impl_->storage_managers.at(handle.ctx.dev_type) - .at(handle.ctx.dev_id) - ->Free(handle.dptr, handle.size); +void StorageImpl::Free(Storage::Handle handle) { + const Context &ctx = handle.ctx; + auto&& device = storage_managers_.at(ctx.dev_type); + storage::StorageManager *maneger = device.Get( + ctx.dev_id, []() { + LOG(FATAL) << "Cannot Free space to a device you have not allocated"; + return nullptr; + }); + this->ActivateDevice(ctx); + maneger->Free(handle.dptr, handle.size); } -Storage::~Storage() = default; - std::shared_ptr Storage::_GetSharedRef() { - static std::shared_ptr inst(new Storage()); + static std::shared_ptr inst(new StorageImpl()); return inst; } @@ -108,8 +104,4 @@ Storage* Storage::Get() { static Storage *ptr = _GetSharedRef().get(); return ptr; } - -Storage::Storage() : impl_{new Impl{}} {} - - } // namespace mxnet From a36338e0cfcd2894b40749ce1c6e2d9620d257ed Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 27 Sep 2015 18:44:04 -0700 Subject: [PATCH 13/13] [MSHADOW] sync to head --- mshadow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mshadow b/mshadow index 8a117cca5ac7..decce9531b44 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit 8a117cca5ac72601e8dbedd832b46bf19e0ae544 +Subproject commit decce9531b4493cd348c6fa4436b16cf508d9ce7