diff --git a/docs/guide_advanced.rst b/docs/guide_advanced.rst index 2fc4702..f4d228f 100644 --- a/docs/guide_advanced.rst +++ b/docs/guide_advanced.rst @@ -44,16 +44,15 @@ Here's how to setup such a pipeline with a multi-layer perceptron as a classifie You can then use the pipeline as you would the neural network, or any other standard API from scikit-learn. - Grid Search ----------- -In scikit-learn, you can use a ``GridSearchCV`` to optimize your neural network's parameters automatically, both the top-level parameters and the parameters within the layers. For example, assuming you have your MLP constructed as in the :ref:`Regression` example in the local variable called ``nn``, the layers are named automatically so you can refer to them as follows: +In scikit-learn, you can use a ``GridSearchCV`` to optimize your neural network's hyper-parameters automatically, both the top-level parameters and the parameters within the layers. For example, assuming you have your MLP constructed as in the :ref:`Regression` example in the local variable called ``nn``, the layers are named automatically so you can refer to them as follows: * ``hidden0`` * ``hidden1`` * ... - * ``output2`` + * ``output`` Keep in mind you can manually specify the ``name`` of any ``Layer`` in the constructor if you don't want the automatically assigned name. Then, you can use sklearn's hierarchical parameters to perform a grid search over those nested parameters too: @@ -67,4 +66,23 @@ Keep in mind you can manually specify the ``name`` of any ``Layer`` in the const 'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"]}) gs.fit(a_in, a_out) -This will search through the listed ``learning_rate`` values, the number of hidden units and the activation type for that layer too, and find the best combination of parameters. \ No newline at end of file +This will search through the listed ``learning_rate`` values, the number of hidden units and the activation type for that layer too, and find the best combination of parameters. + + +Randomized Search +----------------- + +In the cases when you have large numbers of hyper-parameters that you want to try automatically to find a good combination, you can use a randomized search as follows: + +.. code:: python + + from scipy import stats + from sklearn.grid_search import RandomizedSearchCV + + rs = RandomizedSearchCV(nn, param_grid={ + learning_rate: stats.uniform(0.001, 0.05), + 'hidden0__units': stats.randint(4, 12), + 'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"]}) + rs.fit(a_in, a_out) + +This works for both :class:`sknn.mlp.Classifier` and :class:`sknn.mlp.Regressor`. \ No newline at end of file diff --git a/sknn/mlp.py b/sknn/mlp.py index ba167e0..bc8eebf 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -48,9 +48,12 @@ class Layer(object): name: str, optional You optionally can specify a name for this layer, and its parameters - will then be accessible to `scikit-learn` via a nested sub-object. For example, - if name is set to `hidden1`, then the parameter `hidden1__units` from the network - is bound to this layer's `units` variable. + will then be accessible to scikit-learn via a nested sub-object. For example, + if name is set to ``layer1``, then the parameter ``layer1__units`` from the network + is bound to this layer's ``units`` variable. + + The name defaults to ``hiddenN`` where N is the integer index of that layer, and the + final layer is always ``output`` without an index. units: int The number of units (also known as neurons) in this layer. This applies to all @@ -353,8 +356,7 @@ def __init__( # Layer names are optional, if not specified then generate one. if layer.name is None: - label = "hidden" if i < len(layers)-1 else "output" - layer.name = "%s%i" % (label, i) + layer.name = ("hidden%i" % i) if i < len(layers)-1 else "output" # sklearn may pass layers in as additional named parameters, remove them. if layer.name in params: diff --git a/sknn/tests/test_layers.py b/sknn/tests/test_layers.py index bf03430..d7f2aa7 100644 --- a/sknn/tests/test_layers.py +++ b/sknn/tests/test_layers.py @@ -10,7 +10,7 @@ class TestNestedParameters(unittest.TestCase): def test_GetParamsIncludesLayers(self): nn = MLPR(layers=[L("Linear", units=123)]) p = nn.get_params() - assert_in('output0', p) + assert_in('output', p) def test_GetParamsMissingLayer(self): nn = MLPR(layers=[L("Linear", units=123)]) @@ -19,9 +19,9 @@ def test_GetParamsMissingLayer(self): def test_SetParamsDoubleUnderscore(self): nn = MLPR(layers=[L("Linear", units=123)]) - nn.set_params(output0__units=456) + nn.set_params(output__units=456) assert_equal(nn.layers[0].units, 456) def test_SetParamsValueError(self): nn = MLPR(layers=[L("Linear")]) - assert_raises(ValueError, nn.set_params, output0__range=1.0) + assert_raises(ValueError, nn.set_params, output__range=1.0) diff --git a/sknn/tests/test_sklearn.py b/sknn/tests/test_sklearn.py index fd25d77..c1314c4 100644 --- a/sknn/tests/test_sklearn.py +++ b/sknn/tests/test_sklearn.py @@ -2,32 +2,57 @@ from nose.tools import (assert_equal, assert_raises, assert_in, assert_not_in) import numpy -from sklearn.grid_search import GridSearchCV +from scipy.stats import randint, uniform + +from sklearn.grid_search import GridSearchCV, RandomizedSearchCV from sklearn.cross_validation import cross_val_score -from sknn.mlp import Regressor as MLPR +from sknn.mlp import Regressor as MLPR, Classifier as MLPC from sknn.mlp import Layer as L -class TestGridSearch(unittest.TestCase): - - def test_RegressorGlobalParams(self): - a_in = numpy.random.uniform(0.0, 1.0, (64,16)) - a_out = numpy.zeros((64,1)) +class TestGridSearchRegressor(unittest.TestCase): + + __estimator__ = MLPR + + def setUp(self): + self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) + self.a_out = numpy.zeros((64,1)) + def test_GridGlobalParams(self): clf = GridSearchCV( - MLPR(layers=[L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Linear")], n_iter=1), param_grid={'learning_rate': [0.01, 0.001]}) - clf.fit(a_in, a_out) - - def test_RegressorLayerParams(self): - a_in = numpy.random.uniform(0.0, 1.0, (64,16)) - a_out = numpy.zeros((64,1)) + clf.fit(self.a_in, self.a_out) + def test_GridLayerParams(self): clf = GridSearchCV( - MLPR(layers=[L("Rectifier", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Rectifier", units=12), L("Linear")], n_iter=1), param_grid={'hidden0__units': [4, 8, 12]}) - clf.fit(a_in, a_out) + clf.fit(self.a_in, self.a_out) + + def test_RandomGlobalParams(self): + clf = RandomizedSearchCV( + self.__estimator__(layers=[L("Softmax")], n_iter=1), + param_distributions={'learning_rate': uniform(0.001, 0.01)}, + n_iter=2) + clf.fit(self.a_in, self.a_out) + + def test_RandomLayerParams(self): + clf = RandomizedSearchCV( + self.__estimator__(layers=[L("Softmax", units=12), L("Linear")], n_iter=1), + param_distributions={'hidden0__units': randint(4, 12)}, + n_iter=2) + clf.fit(self.a_in, self.a_out) + + +class TestGridSearchClassifier(TestGridSearchRegressor): + + __estimator__ = MLPC + + def setUp(self): + self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) + self.a_out = numpy.random.randint(0, 4, (64,)) class TestCrossValidation(unittest.TestCase): @@ -37,3 +62,9 @@ def test_Regressor(self): a_out = numpy.zeros((64,1)) cross_val_score(MLPR(layers=[L("Linear")], n_iter=1), a_in, a_out, cv=5) + + def test_Classifier(self): + a_in = numpy.random.uniform(0.0, 1.0, (64,16)) + a_out = numpy.random.randint(0, 4, (64,)) + + cross_val_score(MLPC(layers=[L("Linear")], n_iter=1), a_in, a_out, cv=5)