diff --git a/doc/KNNRegressor.rst b/doc/KNNRegressor.rst index e34a82a..ac6b8f9 100644 --- a/doc/KNNRegressor.rst +++ b/doc/KNNRegressor.rst @@ -3,41 +3,42 @@ :sc-categories: Regression :sc-related: Classes/FluidKNNClassifier, Classes/FluidDataSet :see-also: -:description: - A nearest-neighbour regressor. A continuous value is predicted for each point as the (weighted) average value of its nearest neighbours. - - https://scikit-learn.org/stable/modules/neighbors.html#regression - +:description: Regression between DataSets using weighted average of neighbours +:discussion: + + KNNRegressor is a supervised machine learning algorithm for regression. In order to make predictions, the KNNRegressor must first be ``fit`` with an input :fluid-obj:`DataSet` of data points, each of which is paired (by means of a shared identifier) with another data point in an output DataSet. + It uses an internal ``KDTree`` to find an input point's ``numNeighbours`` nearest neighbours in an input dataset. The output returned is a weighted average of those neighbours' values from the output DataSet. + + The output DataSet must have only 1 dimension. :control numNeighbours: - number of neigbours to consider in mapping, min 1 + Number of neighbours to consider when interpolating the regressed value. The default is 3. :control weight: - Whether to weight neighbours by distance when producing new point - + Whether to weight neighbours by distance when producing new point. The default is 1 (true). :message fit: - :arg sourceDataSet: Source data + :arg sourceDataSet: input :fluid-obj:`DataSet` - :arg targetDataSet: Target data + :arg targetDataSet: output :fluid-obj:`DataSet` containing only one dimension. :arg action: Run when done - Map a source :fluid-obj:`DataSet` to a one-dimensional target; both DataSets need to have the same number of points. + Map an input :fluid-obj:`DataSet` to a one-dimensional output DataSet. :message predict: - :arg sourceDataSet: data to regress + :arg sourceDataSet: input :fluid-obj:`DataSet` - :arg targetDataSet: output data + :arg targetDataSet: a :fluid-obj:`DataSet` to write the predictions into :arg action: Run when done - Apply learned mapping to a :fluid-obj:`DataSet` and write to an output DataSet + Apply learned mapping to a :fluid-obj:`DataSet` and write predictions to an output DataSet :message predictPoint: @@ -45,4 +46,4 @@ :arg action: Run when done - Apply learned mapping to a data point in a |buffer| + Apply learned mapping to a data point in a |buffer| the predicted value is returned. diff --git a/example-code/sc/KNNRegressor.scd b/example-code/sc/KNNRegressor.scd index 0b1e8e7..78b8e9a 100644 --- a/example-code/sc/KNNRegressor.scd +++ b/example-code/sc/KNNRegressor.scd @@ -1,64 +1,113 @@ code:: -//Make a simple mapping between a ramp and a sine cycle, test with an exponentional ramp +// Making an input dataset of a ramp from 0-1 and an output dataset of a sine wave +// we'll have the KNNRegressor learn the relationship between the inputs and outputs +// so that any input value provided will return where on the sine wave (what amplitude) +// the output should be ( -~source = FluidDataSet(s); -~target = FluidDataSet(s); -~test = FluidDataSet(s); -~output = FluidDataSet(s); -~tmpbuf = Buffer.alloc(s,1); -~regressor = FluidKNNRegressor(s); +~size = 128; +~ds_ramp = FluidDataSet(s).load( + Dictionary.newFrom([ + \cols,1, + \data,Dictionary.newFrom( + ~size.collect{arg i; + [i,i/~size]; // linear: 128 steps from 0 to (slightly less than) 1 + }.flat + ) + ]) +); + +~ds_sine = FluidDataSet(s).load( + Dictionary.newFrom([ + \cols,1, + \data,Dictionary.newFrom( + ~size.collect{ + arg i; + [i,sin(2pi*i/~size)]; // sine wave + }.flat; + ) + ]) +); ) -//Make source, target and test data +// fit to make the KNNRegressor learn the relationship between inputs and outputs ( -~sourcedata = 128.collect{|i|i/128}; -~targetdata = 128.collect{|i| sin(2*pi*i/128) }; -~testdata = 128.collect{|i|(i/128)**2}; - -~source.load( - Dictionary.with( - *[\cols -> 1,\data -> Dictionary.newFrom( - ~sourcedata.collect{|x, i| [i.asString, [x]]}.flatten)]) -); +~regressor = FluidKNNRegressor(s).fit(~ds_ramp,~ds_sine); +) -~target.load( -d = Dictionary.with( - *[\cols -> 1,\data -> Dictionary.newFrom( - ~targetdata.collect{|x, i| [i.asString, [x]]}.flatten)]); -); +// predicting with input dataset should give us what we expect: something that +// looks like a sine wave. +( +~predictions = FluidDataSet(s); +~regressor.predict(~ds_ramp,~predictions,{ + ~predictions.dump({ + arg dict; + var array = Array.newClear(~size); + dict["data"].keysValuesDo{ + arg id, v; + array[id.asInteger] = v[0]; + }; + {array.plot}.defer; + }); +}); +) -~test.load( - Dictionary.with( - *[\cols -> 1,\data -> Dictionary.newFrom( - ~testdata.collect{|x, i| [i.asString, [x]]}.flatten)]) +// now instead of using the linear ramp to derive the output, we'll use a warped ramp: an exponetial curve +// make that dataset to use as input: +( +~ds_exponential = FluidDataSet(s).load( + Dictionary.newFrom([ + \cols,1, + \data,Dictionary.newFrom( + ~size.collect{arg i; + [i,(i/~size)**2]; + }.flat; + ) + ]) ); +) -~targetdata.plot; -~source.print; -~target.print; -~test.print; - +// use the regressor to make predictions based on that input: +( +~regressor.predict(~ds_exponential,~predictions,{ + ~predictions.dump({ + arg dict; + var array = Array.newClear(~size); + dict["data"].keysValuesDo{ + arg id, v; + array[id.asInteger] = v[0]; + }; + array.postln; + {array.plot}.defer; + }); +}); ) -// Now make a regressor and fit it to the source and target, and predict against test -//grab the output data whilst we're at it, so we can inspect +// just for fun let's use the sine wave ds as input... +// notice that all the negative values of the sine wave +// (the second half) are going to have the same three nearest +// neighbours and therefore the same value for their prediction! ( -~outputdata = Array(128); -~regressor.fit(~source, ~target); -~regressor.predict(~test, ~output, 1, action:{ - ~output.dump{|x| 128.do{|i| - ~outputdata.add(x["data"][i.asString][0]) - }}; +~regressor.predict(~ds_sine,~predictions,{ + ~predictions.dump({ + arg dict; + var array = Array.newClear(~size); + dict["data"].keysValuesDo{ + arg id, v; + array[id.asInteger] = v[0]; + }; + array.postln; + {array.plot}.defer; + }); }); ) -//We should see a single cycle of a chirp -~outputdata.plot; +:: +strong::single point transform on arbitrary value:: +code:: +~inbuf = Buffer.loadCollection(s,[0.3]); -// single point transform on arbitrary value -~inbuf = Buffer.loadCollection(s,[0.5]); ~regressor.predictPoint(~inbuf,{|x|x.postln;}); :: @@ -70,11 +119,11 @@ code:: { var input = Saw.kr(2).linlin(-1,1,0,1); var trig = Impulse.kr(ControlRate.ir/10); - var inputPoint = LocalBuf(1); - var outputPoint = LocalBuf(1); + var inputPoint = LocalBuf(1); + var outputPoint = LocalBuf(1); BufWr.kr(input,inputPoint,0); - ~regressor.kr(trig,inputPoint,outputPoint); - BufRd.kr(1,outputPoint,0); + ~regressor.kr(trig,inputPoint,outputPoint); + BufRd.kr(1,outputPoint,0); }.scope )