Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
#13453 [Clojure] - Add Spec Validations to the Optimizer namespace (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
hellonico authored and gigasquid committed Dec 2, 2018
1 parent d91284b commit b684c65
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 5 deletions.
52 changes: 47 additions & 5 deletions contrib/clojure-package/src/org/apache/clojure_mxnet/optimizer.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,19 @@

(ns org.apache.clojure-mxnet.optimizer
(:refer-clojure :exclude [update])
(:import (org.apache.mxnet.optimizer SGD DCASGD NAG AdaDelta RMSProp AdaGrad Adam SGLD)))
(:require
[clojure.spec.alpha :as s]
[org.apache.clojure-mxnet.util :as util])
(:import
(org.apache.mxnet.optimizer SGD DCASGD NAG AdaDelta RMSProp AdaGrad Adam SGLD)
(org.apache.mxnet FactorScheduler)))

(s/def ::learning-rate float?)
(s/def ::momentum float?)
(s/def ::wd float?)
(s/def ::clip-gradient float?)
(s/def ::lr-scheduler #(instance? FactorScheduler))
(s/def ::sgd-opts (s/keys :opt-un [::learning-rate ::momentum ::wd ::clip-gradient ::lr-scheduler]))

(defn sgd
"A very simple SGD optimizer with momentum and weight regularization."
Expand All @@ -26,10 +38,14 @@
momentum 0.0
wd 0.0001
clip-gradient 0}}]
(util/validate! ::sgd-opts opts "Incorrect sgd optimizer options")
(new SGD (float learning-rate) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(sgd {})))

(s/def ::lambda float?)
(s/def ::dcasgd-opts (s/keys :opt-un [::learning-rate ::momentum ::lambda ::wd ::clip-gradient ::lr-scheduler]))

(defn dcasgd
"DCASGD optimizer with momentum and weight regularization.
Implementation of paper 'Asynchronous Stochastic Gradient Descent with
Expand All @@ -40,10 +56,13 @@
lambda 0.04
wd 0.0
clip-gradient 0}}]
(util/validate! ::sgd-opts opts "Incorrect dcasgd optimizer options")
(new DCASGD (float learning-rate) (float lambda) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(dcasgd {})))

(s/def ::nag-opts (s/keys :opt-un [::learning-rate ::momentum ::wd ::clip-gradient ::lr-scheduler]))

(defn nag
"SGD with nesterov.
It is implemented according to
Expand All @@ -53,10 +72,16 @@
momentum 0.0
wd 0.0001
clip-gradient 0}}]
(util/validate! ::nag-opts opts "Incorrect nag optimizer options")
(new NAG (float learning-rate) (float momentum) (float wd) (float clip-gradient) lr-scheduler))
([]
(nag {})))

(s/def ::rho float?)
(s/def ::rescale-gradient float?)
(s/def ::epsilon float?)
(s/def ::ada-delta-opts (s/keys :opt-un [::rho ::rescale-gradient ::epsilon ::wd ::clip-gradient]))

(defn ada-delta
"AdaDelta optimizer as described in Matthew D. Zeiler, 2012.
http://arxiv.org/abs/1212.5701"
Expand All @@ -66,10 +91,15 @@
epsilon 1e-8
wd 0.0
clip-gradient 0}}]
(util/validate! ::ada-delta-opts opts "Incorrect ada-delta optimizer options")
(new AdaDelta (float rho) (float rescale-gradient) (float epsilon) (float wd) (float clip-gradient)))
([]
(ada-delta {})))

(s/def gamma1 float?)
(s/def gamma2 float?)
(s/def ::rms-prop-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::gamma1 ::gamma2 ::wd ::clip-gradient]))

(defn rms-prop
"RMSProp optimizer as described in Tieleman & Hinton, 2012.
http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
Expand All @@ -80,18 +110,21 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate rescale-gradient gamma1 gamma2 wd lr-scheduler clip-gradient]
([{:keys [learning-rate rescale-gradient gamma1 gamma2 wd lr-scheduler clip-gradient] :as opts
:or {learning-rate 0.002
rescale-gradient 1.0
gamma1 0.95
gamma2 0.9
wd 0.0
clip-gradient 0}}]
(util/validate! ::rms-prop-opts opts "Incorrect rms-prop optimizer options")
(new RMSProp (float learning-rate) (float rescale-gradient) (float gamma1)
(float gamma2) (float wd) lr-scheduler (float clip-gradient)))
([]
(rms-prop {})))

(s/def ::ada-grad-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::epsilon ::wd]))

(defn ada-grad
" AdaGrad optimizer as described in Duchi, Hazan and Singer, 2011.
http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
Expand All @@ -101,15 +134,20 @@
Default value is set to 1e-7.
- rescale-gradient rescaling factor of gradient.
- wd L2 regularization coefficient add to all the weights"
([{:keys [learning-rate rescale-gradient epsilon wd]
([{:keys [learning-rate rescale-gradient epsilon wd] :as opts
:or {learning-rate 0.05
rescale-gradient 1.0
epsilon 1e-7
wd 0.0}}]
(util/validate! ::ada-grad-opts opts "Incorrect ada-grad optimizer options")
(new AdaGrad (float learning-rate) (float rescale-gradient) (float epsilon) (float wd)))
([]
(ada-grad {})))

(s/def ::beta1 float?)
(s/def ::beta2 float?)
(s/def ::adam-opts (s/keys :opt-un [::learning-rate ::beta1 ::beta2 ::epsilon ::decay-factor ::wd ::clip-gradient ::lr-scheduler]))

(defn adam
"Adam optimizer as described in [King2014]
Expand All @@ -125,19 +163,22 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate beta1 beta2 epsilon decay-factor wd clip-gradient lr-scheduler]
([{:keys [learning-rate beta1 beta2 epsilon decay-factor wd clip-gradient lr-scheduler] :as opts
:or {learning-rate 0.002
beta1 0.9
beta2 0.999
epsilon 1e-8
decay-factor (- 1 1e-8)
wd 0
clip-gradient 0}}]
(util/validate! ::adam-opts opts "Incorrect adam optimizer options")
(new Adam (float learning-rate) (float beta1) (float beta2) (float epsilon)
(float decay-factor) (float wd) (float clip-gradient) lr-scheduler))
([]
(adam {})))

(s/def ::sgld-opts (s/keys :opt-un [::learning-rate ::rescale-gradient ::wd ::clip-gradient ::lr-scheduler]))

(defn sgld
"Stochastic Langevin Dynamics Updater to sample from a distribution.
Expand All @@ -146,11 +187,12 @@
- wd L2 regularization coefficient add to all the weights
- clip-gradient Float, clip gradient in range [-clip_gradient, clip_gradient]
- lr-scheduler The learning rate scheduler"
([{:keys [learning-rate rescale-gradient wd clip-gradient lr-scheduler]
([{:keys [learning-rate rescale-gradient wd clip-gradient lr-scheduler] :as opts
:or {learning-rate 0.01
rescale-gradient 1
wd 0.0001
clip-gradient 0}}]
(util/validate! ::sgld-opts opts "Incorrect sgld optimizer options")
(new SGLD (float learning-rate) (float rescale-gradient) (float wd)
(float clip-gradient) lr-scheduler))
([]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@
["sgld" optimizer/sgld]]]
(doseq [opt opts]
(test-optimizer opt))))

(deftest test-optimizers-parameters-specs
(is (thrown? Exception (optimizer/sgd {:wd 'a})))
(is (thrown? Exception (optimizer/dcasgd {:lambda 'a})))
(is (thrown? Exception (optimizer/nag {:momentum 'a})))
(is (thrown? Exception (optimizer/ada-delta {:epsilon 'a})))
(is (thrown? Exception (optimizer/rms-prop {:gamma1 'a})))
(is (thrown? Exception (optimizer/ada-grad {:rescale-gradient 'a})))
(is (thrown? Exception (optimizer/adam {:beta1 'a})))
(is (thrown? Exception (optimizer/sgld {:lr-scheduler 0.1}))))

0 comments on commit b684c65

Please sign in to comment.