From 8011b77cc6291dcac90fd78be8236c836e1bc2eb Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Wed, 8 May 2024 11:18:31 -0700 Subject: [PATCH] Change units of pr bias plots to mm/day --- .../user_io/leaderboard/compare_with_obs.jl | 17 +++++++------ .../user_io/leaderboard/data_sources.jl | 24 +++++++++++++++---- .../ClimaEarth/user_io/leaderboard/utils.jl | 2 +- test/experiment_tests/leaderboard.jl | 10 ++++---- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl b/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl index 066111a07f..2cfca1aadb 100644 --- a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl +++ b/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl @@ -1,16 +1,15 @@ const OBS_DS = Dict() +const SIM_DS_KWARGS = Dict() function preprocess_pr_fn(data) - # 1 mm/day -> - 1 kg/m/s2 - # The minus sign comes from the different conventions used - return data .* Float32(-1 / 86400) + # -1 kg/m/s2 -> 1 mm/day + return data .* Float32(-86400) end -OBS_DS["pr"] = ObsDataSource(; - path = joinpath(pr_obs_data_path(), "gpcp.precip.mon.mean.197901-202305.nc"), - var_name = "precip", - preprocess_data_fn = preprocess_pr_fn, -) +OBS_DS["pr"] = + ObsDataSource(; path = joinpath(pr_obs_data_path(), "gpcp.precip.mon.mean.197901-202305.nc"), var_name = "precip") + +SIM_DS_KWARGS["pr"] = (; preprocess_data_fn = preprocess_pr_fn, new_units = "mm / day") # OBS_DS["rsut"] = ObsDataSource(; # path = "OBS/CERES_EBAF-TOA_Ed4.2_Subset_200003-202303.g025.nc", @@ -24,7 +23,7 @@ OBS_DS["pr"] = ObsDataSource(; function bias(output_dir::AbstractString, short_name::AbstractString, target_dates::AbstractArray{<:Dates.DateTime}) obs = OBS_DS[short_name] - sim = SimDataSource(; path = output_dir, short_name) + sim = SimDataSource(; path = output_dir, short_name, SIM_DS_KWARGS["pr"]...) return bias(obs, sim, target_dates) end diff --git a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl b/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl index 83f27010d6..d4593d10a1 100644 --- a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl +++ b/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl @@ -19,8 +19,7 @@ struct ObsDataSource """Name of the latitude dimension in the NetCDF file""" lat_name::AbstractString - """Function that has to be applied to the data to convert it to the same conventions - as CliMA""" + """Function that has to be applied to the data to convert it to different units""" preprocess_data_fn::Function """The NCDataset associated to the file""" @@ -75,9 +74,24 @@ struct SimDataSource """Simulation longitudes and latitudes""" lonlat::Tuple{AbstractArray, AbstractArray} + + """Function that has to be applied to the data to convert it to different units""" + preprocess_data_fn::Function + + # TODO: This should be handled by ClimaAnalysis + """preprocess_data_fn is typically used to change units, so we have to tell ClimaAnalysis what the new + units are.""" + new_units::Union{Nothing, AbstractString} end -function SimDataSource(; path, short_name, reduction = "average", period = "10d") +function SimDataSource(; + path, + short_name, + reduction = "average", + period = "10d", + preprocess_data_fn = identity, + new_units = nothing, +) sim = ClimaAnalysis.SimDir(path) # TODO: Add period, for the time-being, we just pick up what's there @@ -85,7 +99,7 @@ function SimDataSource(; path, short_name, reduction = "average", period = "10d" lonlat = (var.dims["lon"], var.dims["lat"]) - return SimDataSource(path, short_name, reduction, period, var, lonlat) + return SimDataSource(path, short_name, reduction, period, var, lonlat, preprocess_data_fn, new_units) end """ @@ -96,5 +110,5 @@ Return the simulation data at the given date. function data_at_date(sim_ds::SimDataSource, date::Dates.DateTime) start_date = Dates.DateTime(sim_ds.var.attributes["start_date"]) time_diff_seconds = (date - start_date) / Dates.Second(1) - return ClimaAnalysis.slice(sim_ds.var, time = time_diff_seconds).data + return sim_ds.preprocess_data_fn(ClimaAnalysis.slice(sim_ds.var, time = time_diff_seconds).data) end diff --git a/experiments/ClimaEarth/user_io/leaderboard/utils.jl b/experiments/ClimaEarth/user_io/leaderboard/utils.jl index fbefa52b4b..50609703b6 100644 --- a/experiments/ClimaEarth/user_io/leaderboard/utils.jl +++ b/experiments/ClimaEarth/user_io/leaderboard/utils.jl @@ -148,7 +148,7 @@ function bias(obs_ds::ObsDataSource, sim_ds::SimDataSource, target_dates::Abstra rmse = round(sqrt(integrate_on_sphere(mse_arr, lonlat)); sigdigits = 3) global_bias = round(integrate_on_sphere(bias_arr, lonlat); sigdigits = 3) - units = sim_ds.var.attributes["units"] + units = isnothing(sim_ds.new_units) ? sim_ds.var.attributes["units"] : sim_ds.new_units bias_attribs = Dict{String, Any}( "short_name" => "sim-obs_$short_name", diff --git a/test/experiment_tests/leaderboard.jl b/test/experiment_tests/leaderboard.jl index 1ff50d253a..de0e28bca1 100644 --- a/test/experiment_tests/leaderboard.jl +++ b/test/experiment_tests/leaderboard.jl @@ -53,16 +53,18 @@ end @testset "Leaderboard" begin simdir = ClimaAnalysis.SimDir(@__DIR__) - sim_datasource = Leaderboard.SimDataSource(path = @__DIR__, short_name = "pr") + preprocess_fn = (data) -> data .* Float32(-1 / 86400) + + # The conversion is technically not correct for this data source, but what + # we care about here is that preprocess_data_fn works + sim_datasource = Leaderboard.SimDataSource(path = @__DIR__, short_name = "pr", preprocess_data_fn = preprocess_fn) pr = get(simdir, "pr") @test sim_datasource.lonlat[1] == pr.dims["lon"] @test sim_datasource.lonlat[2] == pr.dims["lat"] - @test Leaderboard.data_at_date(sim_datasource, Dates.DateTime(1979, 1, 2)) == pr.data[1, :, :] - - preprocess_fn = (data) -> data .* Float32(-1 / 86400) + @test Leaderboard.data_at_date(sim_datasource, Dates.DateTime(1979, 1, 2)) == preprocess_fn(pr.data[1, :, :]) obs_datasource = Leaderboard.ObsDataSource(; path = joinpath(pr_obs_data_path(), "gpcp.precip.mon.mean.197901-202305.nc"),