From 169752232f43756caab985d9dacf40ba981a498f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 28 Mar 2017 17:58:22 +0900 Subject: [PATCH 1/5] update MSE to be agnostic to dimensions. --- src/metric.jl | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/metric.jl b/src/metric.jl index eb4bff393..92e304f28 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -200,9 +200,10 @@ end """ MSE -Mean Squared Error. TODO: add support for multi-dimensional outputs. +Mean Squared Error. -Calculates the mean squared error regression loss in one dimension. +Calculates the mean squared error regression loss. +Requires that label and prediction have the same shape. """ type MSE <: AbstractEvalMetric @@ -213,14 +214,10 @@ type MSE <: AbstractEvalMetric end function _update_single_output(metric :: MSE, label :: NDArray, pred :: NDArray) - label = copy(label) - pred = copy(pred) - - n_sample = size(pred)[end] - metric.n_sample += n_sample - - for i = 1:n_sample - metric.mse_sum += (label[i] - pred[i])^2 + @assert size(label) == size(pred) + metric.n_sample += length(label) + @nd_as_jl ro=(label, pred) begin + metric.mse_sum += sumabs2(label .- pred) end end From e12052c65027abc1141cad86c11fa7423172eb18 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 30 Mar 2017 12:44:09 +0900 Subject: [PATCH 2/5] convert single outputs to array so that inference has it easier --- src/metric.jl | 204 ++++++++++++++++++++-------------------- test/unittest/metric.jl | 2 +- 2 files changed, 101 insertions(+), 105 deletions(-) diff --git a/src/metric.jl b/src/metric.jl index 92e304f28..271ab719e 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -27,7 +27,13 @@ function update!{T <: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray} number of outputs ($(length(preds))). The calculated metric might not be accuracte.") end for (label, pred) in zip(labels, preds) - _update_single_output(metric, label, pred) + @nd_as_jl ro=(label, pred) begin + # This is a dynamic dispatch since the conversion from NDArray to + # Array is not type-stable. We could use a trait to decide if we should + # convert the NDArray here so that the called function will be type-stable + # or if we should forward the NDArray. + _update_single_output(metric, label, pred) + end end end @@ -154,37 +160,35 @@ type Accuracy <: AbstractEvalMetric Accuracy() = new(0.0, 0) end -function _update_single_output(metric :: Accuracy, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) +function _update_single_output(metric :: Accuracy, label :: Array, pred :: Array) + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) - if ndims(pred) == 4 # Multidimensional case - # Reshape label to be of the same shape as pred. - # Except for the third dimension where the predictions are stored. - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + if ndims(pred) == 4 # Multidimensional case + # Reshape label to be of the same shape as pred. + # Except for the third dimension where the predictions are stored. + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - label = labels[i, j, 1, sample] - klasses = view(pred, i, j, :, sample) - klass = indmax(klasses) - 1 # Classes start at 0...k-1 + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + label = labels[i, j, 1, sample] + klasses = view(pred, i, j, :, sample) + klass = indmax(klasses) - 1 # Classes start at 0...k-1 - metric.acc_sum += klass == label - metric.n_sample += 1 - end + metric.acc_sum += klass == label + metric.n_sample += 1 end end - elseif ndims(pred) == 2 # 1-dimensional case - for sample in 1:size(label, 1) - klass = indmax(view(pred, :, sample)) - 1 - metric.acc_sum += klass == label[sample] - metric.n_sample += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 # 1-dimensional case + for sample in 1:size(label, 1) + klass = indmax(view(pred, :, sample)) - 1 + metric.acc_sum += klass == label[sample] + metric.n_sample += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end @@ -213,12 +217,11 @@ type MSE <: AbstractEvalMetric MSE() = new(0.0, 0) end -function _update_single_output(metric :: MSE, label :: NDArray, pred :: NDArray) +function _update_single_output{T}(metric :: MSE, label :: Array{T}, pred :: Array{T}) @assert size(label) == size(pred) metric.n_sample += length(label) - @nd_as_jl ro=(label, pred) begin - metric.mse_sum += sumabs2(label .- pred) - end + metric.mse_sum += sumabs2(label .- pred) + return nothing end function get(metric :: MSE) @@ -284,10 +287,7 @@ type NMSE <: AbstractEvalMetric NMSE() = new(0.0, 0) end -function _update_single_output(metric :: NMSE, label :: NDArray, pred :: NDArray) - label = copy(label) - pred = copy(pred) - +function _update_single_output(metric :: NMSE, label :: Array, pred :: Array) n_sample = size(pred)[end] metric.n_sample += n_sample @@ -334,42 +334,40 @@ function reset!(metric :: ACE) metric.n_sample = 0 end -function _update_single_output(metric :: ACE, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - eps = metric.eps - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) - if size(label) == size(pred) # simply calculate the cross entropy of the probabilities - for (q, p) in zip(pred, label) - # p == true probability - # q == "unnatural" probability - metric.ace_sum += p * log(q + eps) +function _update_single_output(metric :: ACE, label :: Array, pred :: Array) + eps = metric.eps + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) + if size(label) == size(pred) # simply calculate the cross entropy of the probabilities + for (q, p) in zip(pred, label) + # p == true probability + # q == "unnatural" probability + metric.ace_sum += p * log(q + eps) + metric.n_sample += 1 + end + elseif ndims(pred) == 4 + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification + # Since we can only target labels right now this is the only thing we can do. + target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing + p_k = pred[i, j, target, sample] + metric.ace_sum += log(p_k + eps) metric.n_sample += 1 - end - elseif ndims(pred) == 4 - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification - # Since we can only target labels right now this is the only thing we can do. - target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing - p_k = pred[i, j, target, sample] - metric.ace_sum += log(p_k + eps) - metric.n_sample += 1 - end end end - elseif ndims(pred) == 2 # 1-dimensional case - for sample in 1:size(label, 1) - target = Int(label[sample]) + 1 # 0-based indexing => 1-based indexing - p_k = pred[target, sample] - metric.ace_sum += log(p_k + eps) - metric.n_sample += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 # 1-dimensional case + for sample in 1:size(label, 1) + target = Int(label[sample]) + 1 # 0-based indexing => 1-based indexing + p_k = pred[target, sample] + metric.ace_sum += log(p_k +eps) + metric.n_sample += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end @@ -398,48 +396,46 @@ function reset!(metric :: MultiACE) metric.counts = Base.zero(metric.counts) end -function _update_single_output(metric :: MultiACE, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - eps = metric.eps - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) - @assert size(metric.aces) == size(metric.counts) - if size(label) == size(pred) # simply calculate the cross entropy of the probabilities - for k in 1:length(metric.aces) - kpred = view(pred, ntuple(d->:, ndims(pred) - 2)..., k, :) - klabel = view(label, ntuple(d->:, ndims(label) - 2)..., k, :) - for (q, p) in zip(kpred, klabel) - # p == true probability - # q == "unnatural" probability - metric.aces[k] += p * log(q + eps) - metric.counts[k] += 1 - end +function _update_single_output(metric :: MultiACE, label :: Array, pred :: Array) + eps = metric.eps + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) + @assert size(metric.aces) == size(metric.counts) + if size(label) == size(pred) # simply calculate the cross entropy of the probabilities + for k in 1:length(metric.aces) + kpred = view(pred, ntuple(d->:, ndims(pred) - 2)..., k, :) + klabel = view(label, ntuple(d->:, ndims(label) - 2)..., k, :) + for (q, p) in zip(kpred, klabel) + # p == true probability + # q == "unnatural" probability + metric.aces[k] += p * log(q + eps) + metric.counts[k] += 1 end - elseif ndims(pred) == 4 - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification - # Since we can only target labels right now this is the only thing we can do. - target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing - p_k = pred[i, j, target, sample] - - metric.aces[target] += log(p_k + eps) - metric.counts[target] += 1 - end + end + elseif ndims(pred) == 4 + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification + # Since we can only target labels right now this is the only thing we can do. + target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing + p_k = pred[i, j, target, sample] + + metric.aces[target] += log(p_k + eps) + metric.counts[target] += 1 end end - elseif ndims(pred) == 2 - for sample in 1:size(label, 1) - target = Int(label[sample]) + 1 - p_k = pred[target, sample] - metric.aces[target] += log(p_k + eps) - metric.counts[target] += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 + for sample in 1:size(label, 1) + target = Int(label[sample]) + 1 + p_k = pred[target, sample] + metric.aces[target] += log(p_k + eps) + metric.counts[target] += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end diff --git a/test/unittest/metric.jl b/test/unittest/metric.jl index 9628f6ca2..1377c4c2b 100644 --- a/test/unittest/metric.jl +++ b/test/unittest/metric.jl @@ -48,7 +48,7 @@ function test_ace() probs = convert(Array{Float32}, generate_probs(n_categories, n_observations)) LL = loglikelihood(labels, probs) metric = mx.ACE() # For categorical variables, ACE == -LL - mx._update_single_output(metric, mx.NDArray(labels), mx.NDArray(probs)) + mx._update_single_output(metric, labels, probs) LL_v2 = metric.ace_sum / metric.n_sample @static if VERSION >= v"0.6.0-dev.2075" @test LL ≈ LL_v2 atol=1e-12 From def129f20a03bec6913217c6b9e685941dad66e2 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 14 Apr 2017 11:45:16 +0900 Subject: [PATCH 3/5] convert eps to eltype of array --- src/metric.jl | 8 ++++---- test/unittest/metric.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/metric.jl b/src/metric.jl index 271ab719e..1eed64988 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -334,8 +334,8 @@ function reset!(metric :: ACE) metric.n_sample = 0 end -function _update_single_output(metric :: ACE, label :: Array, pred :: Array) - eps = metric.eps +function _update_single_output{T}(metric :: ACE, label :: Array{T}, pred :: Array{T}) + eps = convert(T, metric.eps) # Samples are stored in the last dimension @assert size(label, ndims(label)) == size(pred, ndims(pred)) if size(label) == size(pred) # simply calculate the cross entropy of the probabilities @@ -396,8 +396,8 @@ function reset!(metric :: MultiACE) metric.counts = Base.zero(metric.counts) end -function _update_single_output(metric :: MultiACE, label :: Array, pred :: Array) - eps = metric.eps +function _update_single_output{T}(metric :: MultiACE, label :: Array{T}, pred :: Array{T}) + eps = convert(T, metric.eps) # Samples are stored in the last dimension @assert size(label, ndims(label)) == size(pred, ndims(pred)) @assert size(metric.aces) == size(metric.counts) diff --git a/test/unittest/metric.jl b/test/unittest/metric.jl index 1377c4c2b..5b5632a87 100644 --- a/test/unittest/metric.jl +++ b/test/unittest/metric.jl @@ -28,7 +28,7 @@ end function loglikelihood{T <: AbstractFloat}(labels::Vector{T}, probs::Array{T, 2}) LL = 0.0 - eps = 1.0e-8 + eps = convert(T, 1.0e-8) for i = 1:size(labels, 1) LL += log(probs[Int(labels[i]) + 1, i] + eps) # labels are zero-based end From c2044d5bd7dad88721f7edd6c06f0f066a91076a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 14 Apr 2017 16:01:01 +0900 Subject: [PATCH 4/5] adds THTT to enable metrics to support NDArray or Array --- src/metric.jl | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/metric.jl b/src/metric.jl index 1eed64988..5c5945e96 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -10,6 +10,16 @@ interfaces: """ abstract AbstractEvalMetric +""" + hasNDArraySupport(metric) -> Val{true/false} + +Trait for `_update_single_output` should return `Val{true}() if metric can handle `NDArray` +directly and `Val{false}()i` if requires `Array`. Metric that work with NDArrays can be +async, while native Julia arrays require that we copy the output of the network, which is +a blocking operation. +""" +hasNDArraySupport(::AbstractEvalMetric) = Val{true}() + """ update!(metric, labels, preds) @@ -21,6 +31,21 @@ Update and accumulate metrics. * `preds::Vector{NDArray}`: the outputs (predictions) of the network. """ function update!{T <: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}) + _update!(metric, labels, preds, hasNDArraySupport(metric)) +end + +function _update!{T<: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}, :: Val{true}) + if length(labels) != length(preds) + Base.warn_once( + "The number of labels ($(length(labels))) does not correspond to the\ + number of outputs ($(length(preds))). The calculated metric might not be accuracte.") + end + for (label, pred) in zip(labels, preds) + _update_single_output(metric, label, pred) + end +end + +function _update!{T<: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}, :: Val{false}) if length(labels) != length(preds) Base.warn_once( "The number of labels ($(length(labels))) does not correspond to the\ @@ -29,9 +54,7 @@ function update!{T <: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray} for (label, pred) in zip(labels, preds) @nd_as_jl ro=(label, pred) begin # This is a dynamic dispatch since the conversion from NDArray to - # Array is not type-stable. We could use a trait to decide if we should - # convert the NDArray here so that the called function will be type-stable - # or if we should forward the NDArray. + # Array is not type-stable. _update_single_output(metric, label, pred) end end @@ -160,6 +183,8 @@ type Accuracy <: AbstractEvalMetric Accuracy() = new(0.0, 0) end +hasNDArraySupport(::Accuracy) = Val{false}() + function _update_single_output(metric :: Accuracy, label :: Array, pred :: Array) # Samples are stored in the last dimension @assert size(label, ndims(label)) == size(pred, ndims(pred)) @@ -217,6 +242,8 @@ type MSE <: AbstractEvalMetric MSE() = new(0.0, 0) end +hasNDArraySupport(::MSE) = Val{false}() + function _update_single_output{T}(metric :: MSE, label :: Array{T}, pred :: Array{T}) @assert size(label) == size(pred) metric.n_sample += length(label) @@ -287,6 +314,8 @@ type NMSE <: AbstractEvalMetric NMSE() = new(0.0, 0) end +hasNDArraySupport(::NMSE) = Val{false}() + function _update_single_output(metric :: NMSE, label :: Array, pred :: Array) n_sample = size(pred)[end] metric.n_sample += n_sample @@ -334,6 +363,8 @@ function reset!(metric :: ACE) metric.n_sample = 0 end +hasNDArraySupport(::ACE) = Val{false}() + function _update_single_output{T}(metric :: ACE, label :: Array{T}, pred :: Array{T}) eps = convert(T, metric.eps) # Samples are stored in the last dimension @@ -396,6 +427,8 @@ function reset!(metric :: MultiACE) metric.counts = Base.zero(metric.counts) end +hasNDArraySupport(::MultiACE) = Val{false}() + function _update_single_output{T}(metric :: MultiACE, label :: Array{T}, pred :: Array{T}) eps = convert(T, metric.eps) # Samples are stored in the last dimension From 349d2a781a7acc4a408e1efca915a0dc99ad874e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 14 Apr 2017 16:09:07 +0900 Subject: [PATCH 5/5] convert MSE to use NDArray as an example for async --- src/metric.jl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/metric.jl b/src/metric.jl index 5c5945e96..3bff815d4 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -236,27 +236,30 @@ Requires that label and prediction have the same shape. """ type MSE <: AbstractEvalMetric - mse_sum :: Float64 + mse_sum :: Vector{NDArray} n_sample :: Int - MSE() = new(0.0, 0) + MSE() = new(Vector{NDArray}(), 0) end -hasNDArraySupport(::MSE) = Val{false}() +hasNDArraySupport(::MSE) = Val{true}() -function _update_single_output{T}(metric :: MSE, label :: Array{T}, pred :: Array{T}) +function _update_single_output(metric :: MSE, label :: NDArray, pred :: NDArray) @assert size(label) == size(pred) metric.n_sample += length(label) - metric.mse_sum += sumabs2(label .- pred) + mse_sum = mx.sum(mx._PowerScalar(label - pred,scalar=2)) + push!(metric.mse_sum, mse_sum) return nothing end function get(metric :: MSE) - return [(:MSE, metric.mse_sum / metric.n_sample)] + # Delay copy until last possible moment + mse_sum = mapreduce(nda->copy(nda)[1], +, 0.0, metric.mse_sum) + return [(:MSE, mse_sum / metric.n_sample)] end function reset!(metric :: MSE) - metric.mse_sum = 0.0 + metric.mse_sum = Vector{NDArray}() metric.n_sample = 0 end