Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-13453] [Clojure] - Add Spec Validations to the Optimizer namespace #13499

Merged
merged 1 commit into from
Dec 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,19 @@

(ns org.apache.clojure-mxnet.optimizer
(:refer-clojure :exclude [update])
(:import (org.apache.mxnet.optimizer SGD DCASGD NAG AdaDelta RMSProp AdaGrad Adam SGLD)))
(:require
[clojure.spec.alpha :as s]
[org.apache.clojure-mxnet.util :as util])
(:import
(org.apache.mxnet.optimizer SGD DCASGD NAG AdaDelta RMSProp AdaGrad Adam SGLD)
(org.apache.mxnet FactorScheduler)))

(s/def ::learning-rate float?)
(s/def ::momentum float?)
(s/def ::wd float?)
(s/def ::clip-gradient float?)
(s/def ::lr-scheduler #(instance? FactorScheduler))
(s/def ::sgd-opts (s/keys :opt-un [::learning-rate ::momentum ::wd ::clip-gradient ::lr-scheduler]))

(defn sgd
"A very simple SGD optimizer with momentum and weight regularization."
Expand All @@ -26,10 +38,14 @@
momentum 0.0
wd 0.0001
clip-gradient 0}}]
(util/validate! ::sgd-opts opts "Incorrect sgd optimizer options")
(new SGD (float learning-rate) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(sgd {})))

(s/def ::lambda float?)
(s/def ::dcasgd-opts (s/keys :opt-un [::learning-rate ::momentum ::lambda ::wd ::clip-gradient ::lr-scheduler]))

(defn dcasgd
"DCASGD optimizer with momentum and weight regularization.
Implementation of paper 'Asynchronous Stochastic Gradient Descent with
Expand All @@ -40,10 +56,13 @@
lambda 0.04
wd 0.0
clip-gradient 0}}]
(util/validate! ::sgd-opts opts "Incorrect dcasgd optimizer options")
(new DCASGD (float learning-rate) (float lambda) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(dcasgd {})))

(s/def ::nag-opts (s/keys :opt-un [::learning-rate ::momentum ::wd ::clip-gradient ::lr-scheduler]))

(defn nag
"SGD with nesterov.
It is implemented according to
Expand All @@ -53,10 +72,16 @@
momentum 0.0
wd 0.0001
clip-gradient 0}}]
(util/validate! ::nag-opts opts "Incorrect nag optimizer options")
(new NAG (float learning-rate) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(nag {})))

(s/def ::rho float?)
(s/def ::rescale-gradient float?)
(s/def ::epsilon float?)
(s/def ::ada-delta-opts (s/keys :opt-un [::rho ::rescale-gradient ::epsilon ::wd ::clip-gradient]))

(defn ada-delta
"AdaDelta optimizer as described in Matthew D. Zeiler, 2012.
http://arxiv.org/abs/1212.5701"
Expand All @@ -66,10 +91,15 @@
epsilon 1e-8
wd 0.0
clip-gradient 0}}]
(util/validate! ::ada-delta-opts opts "Incorrect ada-delta optimizer options")
(new AdaDelta (float rho) (float rescale-gradient) (float epsilon) (float wd) (float clip-gradient)))
([]
(ada-delta {})))

(s/def gamma1 float?)
(s/def gamma2 float?)
(s/def ::rms-prop-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::gamma1 ::gamma2 ::wd ::clip-gradient]))

(defn rms-prop
"RMSProp optimizer as described in Tieleman & Hinton, 2012.
http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
Expand All @@ -80,18 +110,21 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate rescale-gradient gamma1 gamma2 wd lr-scheduler clip-gradient]
([{:keys [learning-rate rescale-gradient gamma1 gamma2 wd lr-scheduler clip-gradient] :as opts
:or {learning-rate 0.002
rescale-gradient 1.0
gamma1 0.95
gamma2 0.9
wd 0.0
clip-gradient 0}}]
(util/validate! ::rms-prop-opts opts "Incorrect rms-prop optimizer options")
(new RMSProp (float learning-rate) (float rescale-gradient) (float gamma1)
(float gamma2) (float wd) lr-scheduler (float clip-gradient)))
([]
(rms-prop {})))

(s/def ::ada-grad-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::epsilon ::wd]))

(defn ada-grad
" AdaGrad optimizer as described in Duchi, Hazan and Singer, 2011.
http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
Expand All @@ -101,15 +134,20 @@
Default value is set to 1e-7.
- rescale-gradient rescaling factor of gradient.
- wd L2 regularization coefficient add to all the weights"
([{:keys [learning-rate rescale-gradient epsilon wd]
([{:keys [learning-rate rescale-gradient epsilon wd] :as opts
:or {learning-rate 0.05
rescale-gradient 1.0
epsilon 1e-7
wd 0.0}}]
(util/validate! ::ada-grad-opts opts "Incorrect ada-grad optimizer options")
(new AdaGrad (float learning-rate) (float rescale-gradient) (float epsilon) (float wd)))
([]
(ada-grad {})))

(s/def ::beta1 float?)
(s/def ::beta2 float?)
(s/def ::adam-opts (s/keys :opt-un [::learning-rate ::beta1 ::beta2 ::epsilon ::decay-factor ::wd ::clip-gradient ::lr-scheduler]))

(defn adam
"Adam optimizer as described in [King2014]

Expand All @@ -125,19 +163,22 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate beta1 beta2 epsilon decay-factor wd clip-gradient lr-scheduler]
([{:keys [learning-rate beta1 beta2 epsilon decay-factor wd clip-gradient lr-scheduler] :as opts
:or {learning-rate 0.002
beta1 0.9
beta2 0.999
epsilon 1e-8
decay-factor (- 1 1e-8)
wd 0
clip-gradient 0}}]
(util/validate! ::adam-opts opts "Incorrect adam optimizer options")
(new Adam (float learning-rate) (float beta1) (float beta2) (float epsilon)
(float decay-factor) (float wd) (float clip-gradient) lr-scheduler))
([]
(adam {})))

(s/def ::sgld-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::wd ::clip-gradient ::lr-scheduler]))

(defn sgld
"Stochastic Langevin Dynamics Updater to sample from a distribution.

Expand All @@ -146,11 +187,12 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient Float, clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate rescale-gradient wd clip-gradient lr-scheduler]
([{:keys [learning-rate rescale-gradient wd clip-gradient lr-scheduler] :as opts
:or {learning-rate 0.01
rescale-gradient 1
wd 0.0001
clip-gradient 0}}]
(util/validate! ::sgld-opts opts "Incorrect sgld optimizer options")
(new SGLD (float learning-rate) (float rescale-gradient) (float wd)
(float clip-gradient) lr-scheduler))
([]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@
["sgld" optimizer/sgld]]]
(doseq [opt opts]
(test-optimizer opt))))

(deftest test-optimizers-parameters-specs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice tests 👍

(is (thrown? Exception (optimizer/sgd {:wd 'a})))
(is (thrown? Exception (optimizer/dcasgd {:lambda 'a})))
(is (thrown? Exception (optimizer/nag {:momentum 'a})))
(is (thrown? Exception (optimizer/ada-delta {:epsilon 'a})))
(is (thrown? Exception (optimizer/rms-prop {:gamma1 'a})))
(is (thrown? Exception (optimizer/ada-grad {:rescale-gradient 'a})))
(is (thrown? Exception (optimizer/adam {:beta1 'a})))
(is (thrown? Exception (optimizer/sgld {:lr-scheduler 0.1}))))