diff --git a/src/metric.jl b/src/metric.jl index eb4bff393..3bff815d4 100644 --- a/src/metric.jl +++ b/src/metric.jl @@ -10,6 +10,16 @@ interfaces: """ abstract AbstractEvalMetric +""" + hasNDArraySupport(metric) -> Val{true/false} + +Trait for `_update_single_output` should return `Val{true}() if metric can handle `NDArray` +directly and `Val{false}()i` if requires `Array`. Metric that work with NDArrays can be +async, while native Julia arrays require that we copy the output of the network, which is +a blocking operation. +""" +hasNDArraySupport(::AbstractEvalMetric) = Val{true}() + """ update!(metric, labels, preds) @@ -21,6 +31,10 @@ Update and accumulate metrics. * `preds::Vector{NDArray}`: the outputs (predictions) of the network. """ function update!{T <: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}) + _update!(metric, labels, preds, hasNDArraySupport(metric)) +end + +function _update!{T<: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}, :: Val{true}) if length(labels) != length(preds) Base.warn_once( "The number of labels ($(length(labels))) does not correspond to the\ @@ -31,6 +45,21 @@ function update!{T <: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray} end end +function _update!{T<: AbstractEvalMetric}(metric :: T, labels :: Vector{NDArray}, preds :: Vector{NDArray}, :: Val{false}) + if length(labels) != length(preds) + Base.warn_once( + "The number of labels ($(length(labels))) does not correspond to the\ + number of outputs ($(length(preds))). The calculated metric might not be accuracte.") + end + for (label, pred) in zip(labels, preds) + @nd_as_jl ro=(label, pred) begin + # This is a dynamic dispatch since the conversion from NDArray to + # Array is not type-stable. + _update_single_output(metric, label, pred) + end + end +end + """ reset!(metric) @@ -154,37 +183,37 @@ type Accuracy <: AbstractEvalMetric Accuracy() = new(0.0, 0) end -function _update_single_output(metric :: Accuracy, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) +hasNDArraySupport(::Accuracy) = Val{false}() + +function _update_single_output(metric :: Accuracy, label :: Array, pred :: Array) + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) - if ndims(pred) == 4 # Multidimensional case - # Reshape label to be of the same shape as pred. - # Except for the third dimension where the predictions are stored. - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + if ndims(pred) == 4 # Multidimensional case + # Reshape label to be of the same shape as pred. + # Except for the third dimension where the predictions are stored. + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - label = labels[i, j, 1, sample] - klasses = view(pred, i, j, :, sample) - klass = indmax(klasses) - 1 # Classes start at 0...k-1 + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + label = labels[i, j, 1, sample] + klasses = view(pred, i, j, :, sample) + klass = indmax(klasses) - 1 # Classes start at 0...k-1 - metric.acc_sum += klass == label - metric.n_sample += 1 - end + metric.acc_sum += klass == label + metric.n_sample += 1 end end - elseif ndims(pred) == 2 # 1-dimensional case - for sample in 1:size(label, 1) - klass = indmax(view(pred, :, sample)) - 1 - metric.acc_sum += klass == label[sample] - metric.n_sample += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 # 1-dimensional case + for sample in 1:size(label, 1) + klass = indmax(view(pred, :, sample)) - 1 + metric.acc_sum += klass == label[sample] + metric.n_sample += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end @@ -200,36 +229,37 @@ end """ MSE -Mean Squared Error. TODO: add support for multi-dimensional outputs. +Mean Squared Error. -Calculates the mean squared error regression loss in one dimension. +Calculates the mean squared error regression loss. +Requires that label and prediction have the same shape. """ type MSE <: AbstractEvalMetric - mse_sum :: Float64 + mse_sum :: Vector{NDArray} n_sample :: Int - MSE() = new(0.0, 0) + MSE() = new(Vector{NDArray}(), 0) end -function _update_single_output(metric :: MSE, label :: NDArray, pred :: NDArray) - label = copy(label) - pred = copy(pred) - - n_sample = size(pred)[end] - metric.n_sample += n_sample +hasNDArraySupport(::MSE) = Val{true}() - for i = 1:n_sample - metric.mse_sum += (label[i] - pred[i])^2 - end +function _update_single_output(metric :: MSE, label :: NDArray, pred :: NDArray) + @assert size(label) == size(pred) + metric.n_sample += length(label) + mse_sum = mx.sum(mx._PowerScalar(label - pred,scalar=2)) + push!(metric.mse_sum, mse_sum) + return nothing end function get(metric :: MSE) - return [(:MSE, metric.mse_sum / metric.n_sample)] + # Delay copy until last possible moment + mse_sum = mapreduce(nda->copy(nda)[1], +, 0.0, metric.mse_sum) + return [(:MSE, mse_sum / metric.n_sample)] end function reset!(metric :: MSE) - metric.mse_sum = 0.0 + metric.mse_sum = Vector{NDArray}() metric.n_sample = 0 end @@ -287,10 +317,9 @@ type NMSE <: AbstractEvalMetric NMSE() = new(0.0, 0) end -function _update_single_output(metric :: NMSE, label :: NDArray, pred :: NDArray) - label = copy(label) - pred = copy(pred) +hasNDArraySupport(::NMSE) = Val{false}() +function _update_single_output(metric :: NMSE, label :: Array, pred :: Array) n_sample = size(pred)[end] metric.n_sample += n_sample @@ -337,42 +366,42 @@ function reset!(metric :: ACE) metric.n_sample = 0 end -function _update_single_output(metric :: ACE, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - eps = metric.eps - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) - if size(label) == size(pred) # simply calculate the cross entropy of the probabilities - for (q, p) in zip(pred, label) - # p == true probability - # q == "unnatural" probability - metric.ace_sum += p * log(q + eps) +hasNDArraySupport(::ACE) = Val{false}() + +function _update_single_output{T}(metric :: ACE, label :: Array{T}, pred :: Array{T}) + eps = convert(T, metric.eps) + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) + if size(label) == size(pred) # simply calculate the cross entropy of the probabilities + for (q, p) in zip(pred, label) + # p == true probability + # q == "unnatural" probability + metric.ace_sum += p * log(q + eps) + metric.n_sample += 1 + end + elseif ndims(pred) == 4 + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification + # Since we can only target labels right now this is the only thing we can do. + target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing + p_k = pred[i, j, target, sample] + metric.ace_sum += log(p_k + eps) metric.n_sample += 1 - end - elseif ndims(pred) == 4 - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification - # Since we can only target labels right now this is the only thing we can do. - target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing - p_k = pred[i, j, target, sample] - metric.ace_sum += log(p_k + eps) - metric.n_sample += 1 - end end end - elseif ndims(pred) == 2 # 1-dimensional case - for sample in 1:size(label, 1) - target = Int(label[sample]) + 1 # 0-based indexing => 1-based indexing - p_k = pred[target, sample] - metric.ace_sum += log(p_k + eps) - metric.n_sample += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 # 1-dimensional case + for sample in 1:size(label, 1) + target = Int(label[sample]) + 1 # 0-based indexing => 1-based indexing + p_k = pred[target, sample] + metric.ace_sum += log(p_k +eps) + metric.n_sample += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end @@ -401,48 +430,48 @@ function reset!(metric :: MultiACE) metric.counts = Base.zero(metric.counts) end -function _update_single_output(metric :: MultiACE, label :: NDArray, pred :: NDArray) - @nd_as_jl ro=(label,pred) begin - eps = metric.eps - # Samples are stored in the last dimension - @assert size(label, ndims(label)) == size(pred, ndims(pred)) - @assert size(metric.aces) == size(metric.counts) - if size(label) == size(pred) # simply calculate the cross entropy of the probabilities - for k in 1:length(metric.aces) - kpred = view(pred, ntuple(d->:, ndims(pred) - 2)..., k, :) - klabel = view(label, ntuple(d->:, ndims(label) - 2)..., k, :) - for (q, p) in zip(kpred, klabel) - # p == true probability - # q == "unnatural" probability - metric.aces[k] += p * log(q + eps) - metric.counts[k] += 1 - end +hasNDArraySupport(::MultiACE) = Val{false}() + +function _update_single_output{T}(metric :: MultiACE, label :: Array{T}, pred :: Array{T}) + eps = convert(T, metric.eps) + # Samples are stored in the last dimension + @assert size(label, ndims(label)) == size(pred, ndims(pred)) + @assert size(metric.aces) == size(metric.counts) + if size(label) == size(pred) # simply calculate the cross entropy of the probabilities + for k in 1:length(metric.aces) + kpred = view(pred, ntuple(d->:, ndims(pred) - 2)..., k, :) + klabel = view(label, ntuple(d->:, ndims(label) - 2)..., k, :) + for (q, p) in zip(kpred, klabel) + # p == true probability + # q == "unnatural" probability + metric.aces[k] += p * log(q + eps) + metric.counts[k] += 1 end - elseif ndims(pred) == 4 - labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) - for sample in 1:size(labels, 4) - for j in 1:size(labels, 2) - for i in 1:size(labels, 1) - # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification - # Since we can only target labels right now this is the only thing we can do. - target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing - p_k = pred[i, j, target, sample] - - metric.aces[target] += log(p_k + eps) - metric.counts[target] += 1 - end + end + elseif ndims(pred) == 4 + labels = reshape(label, size(pred, 1, 2)..., 1, size(pred, 4)) + for sample in 1:size(labels, 4) + for j in 1:size(labels, 2) + for i in 1:size(labels, 1) + # Cross-entropy reduces to -(ln(p_1)*0 + ln(p_2)*1) for classification + # Since we can only target labels right now this is the only thing we can do. + target = Int(labels[i, j, 1, sample]) + 1 # klasses are 0...k-1 => julia indexing + p_k = pred[i, j, target, sample] + + metric.aces[target] += log(p_k + eps) + metric.counts[target] += 1 end end - elseif ndims(pred) == 2 - for sample in 1:size(label, 1) - target = Int(label[sample]) + 1 - p_k = pred[target, sample] - metric.aces[target] += log(p_k + eps) - metric.counts[target] += 1 - end - else - error("Can't handle prediction with dimensions $(ndims(pred)).") end + elseif ndims(pred) == 2 + for sample in 1:size(label, 1) + target = Int(label[sample]) + 1 + p_k = pred[target, sample] + metric.aces[target] += log(p_k + eps) + metric.counts[target] += 1 + end + else + error("Can't handle prediction with dimensions $(ndims(pred)).") end end diff --git a/test/unittest/metric.jl b/test/unittest/metric.jl index 9628f6ca2..5b5632a87 100644 --- a/test/unittest/metric.jl +++ b/test/unittest/metric.jl @@ -28,7 +28,7 @@ end function loglikelihood{T <: AbstractFloat}(labels::Vector{T}, probs::Array{T, 2}) LL = 0.0 - eps = 1.0e-8 + eps = convert(T, 1.0e-8) for i = 1:size(labels, 1) LL += log(probs[Int(labels[i]) + 1, i] + eps) # labels are zero-based end @@ -48,7 +48,7 @@ function test_ace() probs = convert(Array{Float32}, generate_probs(n_categories, n_observations)) LL = loglikelihood(labels, probs) metric = mx.ACE() # For categorical variables, ACE == -LL - mx._update_single_output(metric, mx.NDArray(labels), mx.NDArray(probs)) + mx._update_single_output(metric, labels, probs) LL_v2 = metric.ace_sum / metric.n_sample @static if VERSION >= v"0.6.0-dev.2075" @test LL ≈ LL_v2 atol=1e-12