From 7777e54eff332356d3da5d4e352ff29ece4003c1 Mon Sep 17 00:00:00 2001 From: Kevin Phan <98072684+ph-kev@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:02:21 -0700 Subject: [PATCH] Refactor leaderboard code This commit refactors the leaderboard code using the new features from ClimaAnalysis. This commit also deletes the old leaderboard code and the tests for it. Also, there is an off by one month issue when handling the dates and seasons. This arises because the simulation data assigns the next month the monthly average for this month. For instance, the monthly average for January 2010 is assigned the date 2/1/2010. This is fixed in this commit. The commit also moves the code to leaderboard.jl and data_sources.jl. The file read_amip.jl calls leaderboard.jl to plot now. The conditional to run leaderboard.jl is changed from t_end > 86400 to t_end > 86400 * 31 * 3 since it doesn't make sense to run the code unless monthly averages are computed. This is because the dataset contains monthly averages. One significant difference is the leaderboard which now plot the best and worst single model using only annual rather than averaging the error over annual and seasonal data. --- docs/make.jl | 3 + docs/src/leaderboard.md | 50 +++ experiments/ClimaEarth/Project.toml | 2 +- .../ClimaEarth/leaderboard/data_sources.jl | 134 +++++++ .../ClimaEarth/leaderboard/leaderboard.jl | 163 +++++++++ experiments/ClimaEarth/run_amip.jl | 110 +----- experiments/ClimaEarth/user_io/leaderboard.jl | 1 - .../user_io/leaderboard/Leaderboard.jl | 18 - .../user_io/leaderboard/cmip_rmse.jl | 135 ------- .../user_io/leaderboard/compare_with_obs.jl | 329 ------------------ .../user_io/leaderboard/data_sources.jl | 127 ------- .../ClimaEarth/user_io/leaderboard/utils.jl | 240 ------------- test/experiment_tests/leaderboard.jl | 90 ----- test/runtests.jl | 5 +- 14 files changed, 356 insertions(+), 1051 deletions(-) create mode 100644 docs/src/leaderboard.md create mode 100644 experiments/ClimaEarth/leaderboard/data_sources.jl create mode 100644 experiments/ClimaEarth/leaderboard/leaderboard.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/data_sources.jl delete mode 100644 experiments/ClimaEarth/user_io/leaderboard/utils.jl delete mode 100644 test/experiment_tests/leaderboard.jl diff --git a/docs/make.jl b/docs/make.jl index c21ee0bc1..d1375f50d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -70,11 +70,14 @@ interface_pages = [ ] performance_pages = ["performance.md"] +output_pages = ["leaderboard.md"] + pages = Any[ "Home" => "index.md", "Examples" => experiment_pages, "Coupler Interface" => interface_pages, "Performance" => performance_pages, + "Model Output" => output_pages, ] diff --git a/docs/src/leaderboard.md b/docs/src/leaderboard.md new file mode 100644 index 000000000..d2f826556 --- /dev/null +++ b/docs/src/leaderboard.md @@ -0,0 +1,50 @@ +# Leaderboard + +## AMIP Driver + +### Add a new variable to compare against observations +Computing errors against observations are all contained in the `leaderboard` folder. The +files in the leaderboard folder are `data_sources.jl` and `leaderboard.jl`. Loading and +preprocessing variables of interest are done in `data_sources.jl` and computing the errors +and plotting are done in `leaderboard.jl`. To add a new variable, you ideally only need to +modify `data_sources.jl`. + +#### Add a new variable to the bias plots +If you want to add a new variable to the bias plots, you add the variable to `sim_var_dict`, +`obs_var_dict`, `compare_vars_biases_groups`, and optionally +`compare_vars_biases_plot_extrema`. + +The dictionaries `sim_var_dict` and `obs_var_dict` map short names to an anonymous function +that returns a [`OutputVar`](https://clima.github.io/ClimaAnalysis.jl/dev/var/). Both +dictionaries must use the same short names as the keys so that the right simulation and +observational data are compared. + +!!! tip "Preprocessing" + Observational and simulational data should be preprocessed for dates and units. For + simulation data, monthly averages correspond to the first day following the month. + For instance, the monthly average corresponding to January 2010 is on the date + 2/1/2010. Preprocessing is done to shift this date to 1/1/2010. When preprocessing + data, we follow the convention that the first day corresponds to the monthly average + for that month. For observational data, you should check the convention being followed + and preprocess the dates if necessary. + + For `obs_var_dict`, the anonymous function must take in a start date. The start date is + used in `leaderboard.jl` to adjust the seconds in the `OutputVar` to match between start + date in the simulation data. + + Units should be the same between the simulation and observational data. + +The variable `compare_vars_biases_groups` is an array of arrays of short names that control +which variables are plotted together. You can add the variable to an existing array or make +a new array. The dictionary `compare_vars_biases_plot_extrema` maps short names to tuples. +The dictionary sets the lower and upper bounds of the bias plots. + +#### Add a new variable to the leaderboard +If you want to add a new variable to the leaderboard, you add the variable to +`rmse_var_names` and `rmse_var_dict`. The array `rmse_var_names` is a list of short names. +The dictionary `rmse_var_dict` maps short name to +[`RMSEVariable`](https://clima.github.io/ClimaAnalysis.jl/dev/rmse_var/). A `RMSEVariable` +must be initialized for each variable of interest. The CliMA model is added with units to +the `RMSEVariable`. It is assumed that the `RMSEVariable` contains only the columns "DJF", +"MAM", "JJA", "SON", and "ANN" in that order. The file `leaderboard.jl` will load the +appropriate data into the `RMSEVariable`. diff --git a/experiments/ClimaEarth/Project.toml b/experiments/ClimaEarth/Project.toml index 0d57a91d1..47448e800 100644 --- a/experiments/ClimaEarth/Project.toml +++ b/experiments/ClimaEarth/Project.toml @@ -34,7 +34,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] ArgParse = "1.1" ArtifactWrappers = "0.2" -ClimaAnalysis = "0.5.4" +ClimaAnalysis = "0.5.10" ClimaAtmos = "0.27" ClimaCorePlots = "0.2" ClimaDiagnostics = "0.2" diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl new file mode 100644 index 000000000..6644d9cf5 --- /dev/null +++ b/experiments/ClimaEarth/leaderboard/data_sources.jl @@ -0,0 +1,134 @@ +import ClimaAnalysis +import ClimaUtilities.ClimaArtifacts: @clima_artifact + +# For plotting bias plots +compare_vars_biases_plot_extrema = Dict( + "pr" => (-5.0, 5.0), + "rsdt" => (-2.0, 2.0), + "rsut" => (-50.0, 50.0), + "rlut" => (-50.0, 50.0), + "rsutcs" => (-20.0, 20.0), + "rlutcs" => (-20.0, 20.0), + "rsds" => (-50.0, 50.0), + "rsus" => (-10.0, 10.0), + "rlds" => (-50.0, 50.0), + "rlus" => (-50.0, 50.0), + "rsdscs" => (-10.0, 10.0), + "rsuscs" => (-10.0, 10.0), + "rldscs" => (-20.0, 20.0), +) + +# To add a variable to the bias plots, add the variable to sim_var_dict, obs_var_dict, +# compare_vars_biases_groups, and compare_vars_biases_plot_extrema +# To add a variable to the leaderboard, add the variable to rmse_var_names and rmse_var_dict + +# Dict for loading in simulation data +sim_var_dict = Dict{String, Any}( + "pr" => + () -> begin + sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr") + sim_var = + ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400)) + sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end, +) + +# Loop to load the rest of the simulation data +# Tuple of short names for loading simulation and observational data +sim_obs_short_names_no_pr = [ + ("rsdt", "solar_mon"), + ("rsut", "toa_sw_all_mon"), + ("rlut", "toa_lw_all_mon"), + ("rsutcs", "toa_sw_clr_t_mon"), + ("rlutcs", "toa_lw_clr_t_mon"), + ("rsds", "sfc_sw_down_all_mon"), + ("rsus", "sfc_sw_up_all_mon"), + ("rlds", "sfc_lw_down_all_mon"), + ("rlus", "sfc_lw_up_all_mon"), + ("rsdscs", "sfc_sw_down_clr_t_mon"), + ("rsuscs", "sfc_sw_up_clr_t_mon"), + ("rldscs", "sfc_lw_down_clr_t_mon"), +] + +# Add "pr" and the necessary preprocessing +for (short_name, _) in sim_obs_short_names_no_pr + sim_var_dict[short_name] = + () -> begin + sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name) + sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end +end + +# Dict for loading observational data +# Add "pr" and the necessary preprocessing +obs_var_dict = Dict{String, Any}( + "pr" => + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"), + "precip", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + return obs_var + end, +) + +# Loop to load the rest of the observational data and the necessary preprocessing +for (sim_name, obs_name) in sim_obs_short_names_no_pr + obs_var_dict[sim_name] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), + obs_name, + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + # Convert from W m-2 to W m^-2 + ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") : + error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units") + return obs_var + end +end + +# Used to organize plots +compare_vars_biases_groups = [ + ["pr", "rsdt", "rsut", "rlut"], + ["rsds", "rsus", "rlds", "rlus"], + ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"], +] + +# For plotting box plots and leaderboard + +# Load data into RMSEVariables +rmse_var_names = ["pr", "rsut", "rlut"] +rmse_var_pr = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"), + "pr", + units = "mm / day", +) +rmse_var_rsut = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"), + "rsut", + units = "W m^-2", +) +rmse_var_rlut = ClimaAnalysis.read_rmses( + joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"), + "rlut", + units = "W m^-2", +) + +# Add models and units for CliMA +rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA") +ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day") + +rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA") +ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2") + +rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA") +ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2") + +# Store the rmse vars in a dictionary +rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut) diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl new file mode 100644 index 000000000..f42887a2e --- /dev/null +++ b/experiments/ClimaEarth/leaderboard/leaderboard.jl @@ -0,0 +1,163 @@ +import ClimaAnalysis +import ClimaUtilities.ClimaArtifacts: @clima_artifact +import GeoMakie +import CairoMakie +import Dates + +include("data_sources.jl") + +""" + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases and a leaderboard of various variables. + +The paramter `leaderboard_base_path` is the path to save the leaderboards and bias plots and +`diagnostics_folder_path` is the path to the simulation data. +""" +function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + @info "Error against observations" + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + seasons = ["ANN", "MAM", "JJA", "SON", "DJF"] + + # Print dates for debugging + _, var_func = first(sim_var_dict) + var = var_func() + output_dates = Dates.DateTime(var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + for short_name in keys(sim_var_dict) + # Simulation data + sim_var = sim_var_dict[short_name]() + + # Observational data + obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"]) + + # Remove first spin_up_months from simulation + spin_up_months = 6 + spinup_cutoff = spin_up_months * 31 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && + (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff)) + + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + obs_var_seasons = ClimaAnalysis.split_by_season(obs_var) + sim_var_seasons = ClimaAnalysis.split_by_season(sim_var) + + # Add annual to start of seasons + obs_var_seasons = (obs_var, obs_var_seasons...) + sim_var_seasons = (sim_var, sim_var_seasons...) + + # Take time average + obs_var_seasons = ( + !ClimaAnalysis.isempty(obs_var) ? ClimaAnalysis.average_time(obs_var) : obs_var for + obs_var in obs_var_seasons + ) + sim_var_seasons = ( + !ClimaAnalysis.isempty(sim_var) ? ClimaAnalysis.average_time(sim_var) : sim_var for + sim_var in sim_var_seasons + ) + + # Save observation and simulation data + sim_obs_comparsion_dict[short_name] = Dict( + season => (sim_var_s, obs_var_s) for + (season, sim_var_s, obs_var_s) in zip(seasons, sim_var_seasons, obs_var_seasons) + ) + end + + # Filter seasons to remove seasons with no dates + _, var = first(sim_obs_comparsion_dict) + filter!(season -> !ClimaAnalysis.isempty(var[season][1]), seasons) + + # Plot annual plots + for compare_vars_biases in compare_vars_biases_groups + fig_bias = CairoMakie.Figure(; size = (600, 300 * length(compare_vars_biases))) + for (row, short_name) in enumerate(compare_vars_biases) + sim_var = sim_obs_comparsion_dict[short_name]["ANN"][1] + if !ClimaAnalysis.isempty(sim_var) + # Add "mean " for plotting the title + sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))" + cmap_extrema = get(compare_vars_biases_plot_extrema, short_name) do + extrema(ClimaAnalysis.bias(sim_obs_comparsion_dict[short_name]["ANN"]...).data) + end + ClimaAnalysis.Visualize.plot_bias_on_globe!( + fig_bias, + sim_obs_comparsion_dict[short_name]["ANN"]..., + cmap_extrema = cmap_extrema, + p_loc = (row, 1), + ) + end + end + CairoMakie.save(joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_ANN.png"), fig_bias) + end + + # Plot plots with all the seasons (not annual) + seasons_no_ann = filter(season -> season != "ANN", seasons) + for compare_vars_biases in compare_vars_biases_groups + fig_all_seasons = CairoMakie.Figure(; size = (600 * length(seasons_no_ann), 300 * length(compare_vars_biases))) + for (col, season) in enumerate(seasons_no_ann) + # Plot at 2 * col - 1 because a bias plot takes up 1 x 2 space + CairoMakie.Label(fig_all_seasons[0, 2 * col - 1], season, tellwidth = false, fontsize = 30) + for (row, short_name) in enumerate(compare_vars_biases) + sim_var = sim_obs_comparsion_dict[short_name][season][1] + if !ClimaAnalysis.isempty(sim_var) + cmap_extrema = get(compare_vars_biases_plot_extrema, short_name) do + extrema(ClimaAnalysis.bias(sim_obs_comparsion_dict[short_name][season]...).data) + end + # Add "mean " for plotting the title + sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))" + ClimaAnalysis.Visualize.plot_bias_on_globe!( + fig_all_seasons, + sim_obs_comparsion_dict[short_name][season]..., + cmap_extrema = cmap_extrema, + p_loc = (row, 2 * col - 1), + ) + end + end + end + CairoMakie.save( + joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_all_seasons.png"), + fig_all_seasons, + ) + end + + # Add RMSE for the CliMA model and for each season + for short_name in rmse_var_names + for season in seasons + !ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && ( + rmse_var_dict[short_name]["CliMA", season] = + ClimaAnalysis.global_rmse(sim_obs_comparsion_dict[short_name][season]...) + ) + end + end + + # Plot box plots + fig_leaderboard = CairoMakie.Figure(; size = (800, 300 * length(rmse_var_dict) + 400), fontsize = 20) + for (loc, short_name) in enumerate(rmse_var_names) + ClimaAnalysis.Visualize.plot_boxplot!( + fig_leaderboard, + rmse_var_dict[short_name], + ploc = (loc, 1), + best_and_worst_category_name = "ANN", + ) + end + + # Plot leaderboard + ClimaAnalysis.Visualize.plot_leaderboard!( + fig_leaderboard, + (rmse_var_dict[short_name] for short_name in rmse_var_names)..., + best_category_name = "ANN", + ploc = (length(rmse_var_dict) + 1, 1), + ) + CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard) +end + +if abspath(PROGRAM_FILE) == @__FILE__ + if length(ARGS) < 2 + error("Usage: julia leaderboard.jl ") + end + leaderboard_base_path = ARGS[begin] + diagnostics_folder_path = ARGS[begin + 1] + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) +end diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl index 2bb5fb71d..18d750572 100644 --- a/experiments/ClimaEarth/run_amip.jl +++ b/experiments/ClimaEarth/run_amip.jl @@ -925,116 +925,14 @@ if ClimaComms.iamroot(comms_ctx) files_root = ".monthly", output_dir = dir_paths.artifacts, ) - - ## Compare against observations - if t_end > 84600 && config_dict["output_default_diagnostics"] - @info "Error against observations" - include("user_io/leaderboard.jl") - ClimaAnalysis = Leaderboard.ClimaAnalysis - - compare_vars_biases_plot_extrema = Dict( - "pr" => (-5.0, 5.0), - "rsdt" => (-2.0, 2.0), - "rsut" => (-50.0, 50.0), - "rlut" => (-50.0, 50.0), - "rsutcs" => (-20.0, 20.0), - "rlutcs" => (-20.0, 20.0), - "rsds" => (-50.0, 50.0), - "rsus" => (-10.0, 10.0), - "rlds" => (-50.0, 50.0), - "rlus" => (-50.0, 50.0), - "rsdscs" => (-10.0, 10.0), - "rsuscs" => (-10.0, 10.0), - "rldscs" => (-20.0, 20.0), - ) - + # Check this because we only want monthly data for making plots + if t_end > 84600 * 31 * 3 && config_dict["output_default_diagnostics"] + include("leaderboard/leaderboard.jl") diagnostics_folder_path = atmos_sim.integrator.p.output_dir leaderboard_base_path = dir_paths.artifacts - - compare_vars_biases_groups = [ - ["pr", "rsdt", "rsut", "rlut"], - ["rsds", "rsus", "rlds", "rlus"], - ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"], - ] - - function compute_biases(compare_vars_biases, dates) - if isempty(dates) - return map(x -> 0.0, compare_vars_biases) - else - return Leaderboard.compute_biases( - diagnostics_folder_path, - compare_vars_biases, - dates, - cmap_extrema = compare_vars_biases_plot_extrema, - ) - end - end - - function plot_biases(dates, biases, output_name) - isempty(dates) && return nothing - - output_path = joinpath(leaderboard_base_path, "bias_$(output_name).png") - Leaderboard.plot_biases(biases; output_path) - end - - first_var = get( - ClimaAnalysis.SimDir(diagnostics_folder_path), - short_name = first(first(compare_vars_biases_groups)), - ) - - diagnostics_times = ClimaAnalysis.times(first_var) - # Remove the first `spinup_months` months from the leaderboard - spinup_months = 6 - # The monthly average output is at the end of the month, so this is safe - spinup_cutoff = spinup_months * 31 * 86400.0 - if diagnostics_times[end] > spinup_cutoff - filter!(x -> x > spinup_cutoff, diagnostics_times) - end - - output_dates = Dates.DateTime(first_var.attributes["start_date"]) .+ Dates.Second.(diagnostics_times) - @info "Working with dates:" - @info output_dates - ## collect all days between cs.dates.date0 and cs.dates.date - MAM, JJA, SON, DJF = Leaderboard.split_by_season(output_dates) - - for compare_vars_biases in compare_vars_biases_groups - ann_biases = compute_biases(compare_vars_biases, output_dates) - plot_biases(output_dates, ann_biases, first(compare_vars_biases) * "_total") - - MAM_biases = compute_biases(compare_vars_biases, MAM) - plot_biases(MAM, MAM_biases, first(compare_vars_biases) * "_MAM") - JJA_biases = compute_biases(compare_vars_biases, JJA) - plot_biases(JJA, JJA_biases, first(compare_vars_biases) * "_JJA") - SON_biases = compute_biases(compare_vars_biases, SON) - plot_biases(SON, SON_biases, first(compare_vars_biases) * "_SON") - DJF_biases = compute_biases(compare_vars_biases, DJF) - plot_biases(DJF, DJF_biases, first(compare_vars_biases) * "_DJF") - end - - compare_vars_rmses = ["pr", "rsut", "rlut"] - - ann_biases = compute_biases(compare_vars_rmses, output_dates) - MAM_biases = compute_biases(compare_vars_rmses, MAM) - JJA_biases = compute_biases(compare_vars_rmses, JJA) - SON_biases = compute_biases(compare_vars_rmses, SON) - DJF_biases = compute_biases(compare_vars_rmses, DJF) - - rmses = map( - (index) -> Leaderboard.RMSEs(; - model_name = "CliMA", - ANN = ann_biases[index], - DJF = DJF_biases[index], - MAM = MAM_biases[index], - JJA = JJA_biases[index], - SON = SON_biases[index], - ), - 1:length(compare_vars_rmses), - ) - - Leaderboard.plot_leaderboard(rmses; output_path = joinpath(leaderboard_base_path, "bias_leaderboard.png")) + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) end end - ## plot extra atmosphere diagnostics if specified if config_dict["ci_plots"] @info "Generating CI plots" diff --git a/experiments/ClimaEarth/user_io/leaderboard.jl b/experiments/ClimaEarth/user_io/leaderboard.jl deleted file mode 100644 index e3af41d01..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard.jl +++ /dev/null @@ -1 +0,0 @@ -include("leaderboard/Leaderboard.jl") diff --git a/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl b/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl deleted file mode 100644 index 6dc73c9f3..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard/Leaderboard.jl +++ /dev/null @@ -1,18 +0,0 @@ -module Leaderboard - -import ClimaComms -import ClimaAnalysis -import Dates -import NCDatasets -import Interpolations -import CairoMakie -import GeoMakie -import ClimaCoupler -import Statistics: mean -import ClimaUtilities.ClimaArtifacts: @clima_artifact - -include("data_sources.jl") -include("utils.jl") -include("compare_with_obs.jl") - -end diff --git a/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl b/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl deleted file mode 100644 index 6f107bbf6..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard/cmip_rmse.jl +++ /dev/null @@ -1,135 +0,0 @@ -import Statistics: median, quantile - -const RMSE_FILE_PATHS = Dict() - -RMSE_FILE_PATHS["pr"] = joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv") -RMSE_FILE_PATHS["rsut"] = joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv") -RMSE_FILE_PATHS["rlut"] = joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv") - -short_names = ["pr", "rsut", "rlut"] -for short_name in short_names - open(RMSE_FILE_PATHS[short_name], "r") do io - # Skip the header - header = readline(io) - - # Process each line - for line in eachline(io) - # Split the line by comma - fields = split(line, ',') - model_name = fields[1] - DJF, MAM, JJA, SON, ANN = map(x -> parse(Float64, x), fields[2:end]) - - push!(OTHER_MODELS_RMSEs[short_name], RMSEs(; model_name, DJF, MAM, JJA, SON, ANN)) - end - end -end - -""" - best_single_model(RMSEs) - -Return the one model that has the overall smallest error. -""" -function best_single_model(RMSEs) - _, index = findmin(r -> abs.(values(r)), RMSEs) - return RMSEs[index] -end - -""" - worst_single_model(RMSEs) - -Return the one model that has the overall largest error. -""" -function worst_single_model(RMSEs) - _, index = findmax(r -> abs.(values(r)), RMSEs) - return RMSEs[index] -end - -""" - RMSE_stats(RMSEs) - -RMSEs is the dictionary OTHER_MODELS_RMSEs. - -Return: -- best single model -- worst single model -- "model" with all the medians -- "model" with all the best values -- "model" with all the worst values -""" -function RMSE_stats(dict_vecRMSEs) - stats = Dict() - # cumulative_error maps model_names with the total RMSE across metrics normalized by median(RMSE) - cumulative_error = Dict() - for (key, vecRMSEs) in dict_vecRMSEs - # Collect into vectors that we can process independently - all_values = stack(values.(vecRMSEs)) - ANN, DJF, JJA, MAM, SON = ntuple(i -> all_values[i, :], 5) - - median_model = RMSEs(; - model_name = "Median", - ANN = median(ANN), - DJF = median(DJF), - JJA = median(JJA), - MAM = median(MAM), - SON = median(SON), - ) - - worst_model = RMSEs(; - model_name = "Worst", - ANN = maximum(abs.(ANN)), - DJF = maximum(abs.(DJF)), - JJA = maximum(abs.(JJA)), - MAM = maximum(abs.(MAM)), - SON = maximum(abs.(SON)), - ) - - best_model = RMSEs(; - model_name = "Best", - ANN = minimum(abs.(ANN)), - DJF = minimum(abs.(DJF)), - JJA = minimum(abs.(JJA)), - MAM = minimum(abs.(MAM)), - SON = minimum(abs.(SON)), - ) - - quantile25 = RMSEs(; - model_name = "Quantile 0.25", - ANN = quantile(ANN, 0.25), - DJF = quantile(DJF, 0.25), - JJA = quantile(JJA, 0.25), - MAM = quantile(MAM, 0.25), - SON = quantile(SON, 0.25), - ) - - quantile75 = RMSEs(; - model_name = "Quantile 0.75", - ANN = quantile(ANN, 0.75), - DJF = quantile(DJF, 0.75), - JJA = quantile(JJA, 0.75), - MAM = quantile(MAM, 0.75), - SON = quantile(SON, 0.75), - ) - - for rmse in vecRMSEs - haskey(cumulative_error, cumulative_error) || (cumulative_error[rmse.model_name] = 0.0) - cumulative_error[rmse.model_name] += sum(values(rmse) ./ values(median_model)) - end - - stats[key] = (; - best_single_model = best_single_model(vecRMSEs), - worst_single_model = worst_single_model(vecRMSEs), - median_model, - worst_model, - best_model, - quantile25, - quantile75, - ) - end - - _, absolute_best_model = findmin(cumulative_error) - _, absolute_worst_model = findmax(cumulative_error) - - return (; stats, absolute_best_model, absolute_worst_model) -end - -const COMPARISON_RMSEs_STATS = RMSE_stats(OTHER_MODELS_RMSEs) diff --git a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl b/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl deleted file mode 100644 index f93c4ba68..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard/compare_with_obs.jl +++ /dev/null @@ -1,329 +0,0 @@ -import CairoMakie - -const OBS_DS = Dict() -const SIM_DS_KWARGS = Dict() -const OTHER_MODELS_RMSEs = Dict() - -function preprocess_pr_fn(data) - # -1 kg/m/s2 -> 1 mm/day - return data .* Float32(-86400) -end - -function replace_nan(x; v = 0.0) - return map(x -> isnan(x) ? zero(x) : x, v) -end - -Base.@kwdef struct RMSEs - model_name::String - ANN::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0 - DJF::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0 - MAM::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0 - JJA::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0 - SON::Union{<:Real, ClimaAnalysis.OutputVar} = 0.0 -end - -function Base.values(r::RMSEs) - val_or_rmse(v::Real) = v - val_or_rmse(v::ClimaAnalysis.OutputVar) = v.attributes["rmse"] - - return val_or_rmse.([r.ANN, r.DJF, r.MAM, r.JJA, r.SON]) -end - -OBS_DS["pr"] = ObsDataSource(; - path = joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"), - var_name = "precip", -) -SIM_DS_KWARGS["pr"] = (; preprocess_data_fn = preprocess_pr_fn, new_units = "mm / day") -OTHER_MODELS_RMSEs["pr"] = [] - -OBS_DS["rsut"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "toa_sw_all_mon", -) -SIM_DS_KWARGS["rsut"] = (;) -OTHER_MODELS_RMSEs["rsut"] = [] - -OBS_DS["rlut"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "toa_lw_all_mon", -) -SIM_DS_KWARGS["rlut"] = (;) -OTHER_MODELS_RMSEs["rlut"] = [] - -OBS_DS["rsdt"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "solar_mon", -) -SIM_DS_KWARGS["rsdt"] = (;) - -OBS_DS["rsutcs"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "toa_sw_clr_t_mon", -) -SIM_DS_KWARGS["rsutcs"] = (;) - -OBS_DS["rlutcs"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "toa_lw_clr_t_mon", -) -SIM_DS_KWARGS["rlutcs"] = (;) - -OBS_DS["rsus"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_sw_up_all_mon", -) -SIM_DS_KWARGS["rsus"] = (;) - -OBS_DS["rsds"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_sw_down_all_mon", -) -SIM_DS_KWARGS["rsds"] = (;) - -OBS_DS["rlus"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_lw_up_all_mon", -) -SIM_DS_KWARGS["rlus"] = (;) - -OBS_DS["rlds"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_lw_down_all_mon", -) -SIM_DS_KWARGS["rlds"] = (;) - -OBS_DS["rsdscs"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_sw_down_clr_t_mon", -) -SIM_DS_KWARGS["rsdscs"] = (;) - -OBS_DS["rsuscs"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_sw_up_clr_t_mon", -) -SIM_DS_KWARGS["rsuscs"] = (;) - -OBS_DS["rldscs"] = ObsDataSource(; - path = joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"), - var_name = "sfc_lw_down_clr_t_mon", -) -SIM_DS_KWARGS["rldscs"] = (;) - -include("cmip_rmse.jl") - -function bias(output_dir::AbstractString, short_name::AbstractString, target_dates::AbstractArray{<:Dates.DateTime}) - obs = OBS_DS[short_name] - sim = SimDataSource(; path = output_dir, short_name, SIM_DS_KWARGS[short_name]...) - return bias(obs, sim, target_dates) -end - -function compute_biases(output_dir, short_names, target_dates::AbstractArray{<:Dates.DateTime}; cmap_extrema = Dict()) - return map(short_names) do name - bias_outvar = bias(output_dir, name, target_dates) - # The attribute is used in plot_bias to fix the colormap - haskey(cmap_extrema, name) && (bias_outvar.attributes["cmap_extrema"] = cmap_extrema[name]) - return bias_outvar - end -end - - -# colors are sampled in [0,1], so define a function that linearly transforms x ∈ [lo, hi] to [0, 1]. -to_unitrange(x::Number, lo::Number, hi::Number) = (x - lo) / (hi - lo) - -""" - constrained_cmap(cols, lo, hi; [categorical=false], [rev=false], [mid=0]) - constrained_cmap(lo, hi; [categorical=false], [rev=false], [mid=0]) - -Constrain a colormap to a given range. - -Given a colormap implicitly defined in `± maximum(abs, (lo, hi))`, constrain it to the range [lo, hi]. -This is useful to ensure that a colormap which is desired to diverge symmetrically around zero maps -the same color intensity to the same magnitude. - -The second form is a convenience function that uses the `:redsblues` colormap. - -# Arguments -- `cols`: a vector of colors, or a ColorScheme -- `lo`: lower bound of the range -- `hi`: upper bound of the range - -# Keyword Arguments -- `mid`: midpoint of the range # TODO: test `mid` better -- `categorical`: flag for whether returned colormap should be categorical or continous -- `rev`: flag for whether to reverse the colormap - -# Returns -- `cmap::Makie.ColorGradient`: a colormap -""" -function constrained_cmap(cols::Vector, lo, hi; mid = 0, categorical = false, rev = false) - constrained_cmap(CairoMakie.Makie.ColorScheme(cols), lo, hi; mid, categorical, rev) -end -function constrained_cmap(cols::CairoMakie.Makie.ColorScheme, lo, hi; mid = 0, categorical = false, rev = false) - rev && (cols = reverse(cols)) # reverse colorscheme if requested, don't reverse below in `cgrad`. - absmax = maximum(abs, (lo, hi) .- mid) - # map lo, hi ∈ [-absmax, absmax] onto [0,1] to sample their corresponding colors - lo_m, hi_m = to_unitrange.((lo, hi) .- mid, -absmax, absmax) - # values on [0,1] where each color in cols is defined - colsvals = range(0, 1; length = length(cols)) - # filter colsvals, keep only values in [lo_m, hi_m] + the endpoints lo_m and hi_m. - filter_colsvals = filter(x -> lo_m <= x <= hi_m, unique([lo_m; colsvals; hi_m])) - # select colors in filtered range; interpolate new low and hi colors. - newcols = CairoMakie.Makie.get(cols, filter_colsvals) - # values on [0,1] where the new colors are defined - new_colsvals = to_unitrange.(filter_colsvals, lo_m, hi_m) - cmap = CairoMakie.cgrad(newcols, new_colsvals; categorical, rev = false) - return cmap -end - -function plot_biases(biases; output_path) - fig = CairoMakie.Figure(; size = (600, 300 * length(biases))) - loc = 1 - - for bias_var in biases - min_level, max_level = get(bias_var.attributes, "cmap_extrema", extrema(bias_var.data)) - - # Make sure that 0 is at the center - cmap = constrained_cmap(CairoMakie.cgrad(:vik).colors, min_level, max_level; categorical = true) - nlevels = 11 - # Offset so that it covers 0 - levels = collect(range(min_level, max_level, length = nlevels)) - offset = levels[argmin(abs.(levels))] - levels = levels .- offset - ticklabels = map(x -> string(round(x; digits = 0)), levels) - ticks = (levels, ticklabels) - - more_kwargs = Dict( - :plot => Dict(:colormap => cmap, :levels => levels, :extendhigh => :auto, :extendlow => :auto), - :cb => Dict(:ticks => ticks), - ) - - ClimaAnalysis.Visualize.contour2D_on_globe!(fig, bias_var; p_loc = (loc, 1), more_kwargs) - loc = loc + 1 - end - CairoMakie.save(output_path, fig) -end - -function plot_leaderboard(rmses; output_path) - fig = CairoMakie.Figure(; size = (800, 300 * length(rmses) + 400), fontsize = 20) - loc = 1 - - NUM_BOXES = 4 + 1 # 4 seasons and 1 annual - NUM_MODELS = 2 # CliMA vs best - - num_variables = length(rmses) - - var_names = map(r -> r.ANN.attributes["var_short_name"], rmses) - - # The square plot is at the very bottom - loc_squares = length(rmses) + 1 - ax_squares = CairoMakie.Axis( - fig[loc_squares, 1], - yticks = (1:num_variables, reverse(var_names)), - xticks = ([3, NUM_BOXES + 3], ["CliMA", "Best model"]), - aspect = NUM_BOXES * NUM_MODELS, - ) - ax_squares2 = CairoMakie.Axis( - fig[loc_squares, 1], - xaxisposition = :top, - xticks = (0.5:4.5, ["Ann", "DJF", "MAM", "JJA", "SON"]), - aspect = NUM_BOXES * NUM_MODELS, - ) - CairoMakie.hidespines!(ax_squares2) - CairoMakie.hideydecorations!(ax_squares2) - - # Preallocate the matrix for the squares, each row is (4 seasons + annual) x - # models compared, and there is one row per variable - squares = zeros(NUM_BOXES * NUM_MODELS, num_variables) - - (; absolute_best_model, absolute_worst_model) = COMPARISON_RMSEs_STATS - - for (var_num, rmse) in enumerate(rmses) - short_name = rmse.ANN.attributes["var_short_name"] - units = rmse.ANN.attributes["units"] - ax = CairoMakie.Axis( - fig[loc, 1], - ylabel = "$short_name [$units]", - xticks = (1:5, ["Ann", "DJF", "MAM", "JJA", "SON"]), - title = "Global RMSE $short_name [$units]", - ) - - # Against other models - - (; median_model) = COMPARISON_RMSEs_STATS.stats[short_name] - - best_single_model = first(filter(x -> x.model_name == absolute_best_model, OTHER_MODELS_RMSEs[short_name])) - worst_single_model = first(filter(x -> x.model_name == absolute_worst_model, OTHER_MODELS_RMSEs[short_name])) - - squares[begin:NUM_BOXES, end - var_num + 1] .= values(rmse) ./ values(median_model) - squares[(NUM_BOXES + 1):end, end - var_num + 1] .= values(best_single_model) ./ values(median_model) - - CairoMakie.scatter!( - ax, - 1:5, - values(median_model), - label = median_model.model_name, - color = :black, - marker = :hline, - markersize = 10, - visible = false, - ) - - categories = vcat(map(_ -> collect(1:5), 1:length(OTHER_MODELS_RMSEs[short_name]))...) - - CairoMakie.boxplot!( - ax, - categories, - vcat(values.(OTHER_MODELS_RMSEs[short_name])...); - whiskerwidth = 1, - width = 0.35, - mediancolor = :black, - color = :gray, - whiskerlinewidth = 1, - ) - - CairoMakie.scatter!(ax, 1:5, values(best_single_model), label = absolute_best_model) - CairoMakie.scatter!(ax, 1:5, values(worst_single_model), label = absolute_worst_model) - - # If we want to plot other models - # for model in OTHER_MODELS_RMSEs[short_name] - # CairoMakie.scatter!(ax, 1:5, values(model), marker = :hline) - # end - - CairoMakie.scatter!( - ax, - 1:5, - values(rmse), - label = rmse.model_name, - marker = :star5, - markersize = 20, - color = :green, - ) - - # Add a fake extra point to center the legend a little better - CairoMakie.scatter!(ax, [6.5], [0.1], markersize = 0.01) - CairoMakie.axislegend() - loc = loc + 1 - end - - colormap = CairoMakie.Reverse(:RdYlGn) - - # Now, the square plot - CairoMakie.heatmap!( - ax_squares, - squares, - colormap = colormap, - # Trick to exclude the zeros - lowclip = :white, - colorrange = (1e-10, maximum(squares)), - ) - CairoMakie.vlines!(ax_squares2, NUM_BOXES, color = :black, linewidth = 3.0) - CairoMakie.Colorbar( - fig[loc_squares, 2], - limits = extrema(squares), - label = "RMSE/median(RMSE)", - colormap = colormap, - ) - - CairoMakie.save(output_path, fig) -end diff --git a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl b/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl deleted file mode 100644 index ce126a6df..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard/data_sources.jl +++ /dev/null @@ -1,127 +0,0 @@ -""" - A struct to describe some observation data that we want to compare against. -""" -struct ObsDataSource - - # NOTE: This struct is not concretely typed, but we don't care about performance here. - # We only care about beauty. - - """Path of the NetCDF file""" - path::AbstractString - - """Name of the variable of interest in the NetCDF file""" - var_name::AbstractString - - """Name of the time dimension in the NetCDF file""" - time_name::AbstractString - """Name of the longitude dimension in the NetCDF file""" - lon_name::AbstractString - """Name of the latitude dimension in the NetCDF file""" - lat_name::AbstractString - - """Function that has to be applied to the data to convert it to different units""" - preprocess_data_fn::Function - - """The NCDataset associated to the file""" - ncdataset::NCDatasets.NCDataset - - """Shift dates so that they are at the end of month?""" - shift_to_end_of_month::Bool -end - -function ObsDataSource(; - path, - var_name, - time_name = "time", - lon_name = "lon", - lat_name = "lat", - preprocess_data_fn = identity, - shift_to_end_of_month = true, -) - - ncdataset = NCDatasets.NCDataset(path) - - return ObsDataSource( - path, - var_name, - time_name, - lon_name, - lat_name, - preprocess_data_fn, - ncdataset, - shift_to_end_of_month, - ) -end - -""" - Base.close(ds::ObsDataSource) - -Close the file associated to `ds`. -""" -function Base.close(ds::ObsDataSource) - NCDatasets.close(ds.ncdataset) -end - -""" - A struct to describe some simulation data. -""" -struct SimDataSource - - # This struct is not concretely typed, but we don't care about performance here. We only - # care about beauty. - - """Path of the simulation output""" - path::AbstractString - - """Short name of the variable of interest""" - short_name::AbstractString - - """Reduction to consider""" - reduction::AbstractString - - """Period of the reduction (e.g., 1d, 30d)""" - period::AbstractString - - """ClimaAnalysis OutputVar""" - var::ClimaAnalysis.OutputVar - - """Simulation longitudes and latitudes""" - lonlat::Tuple{AbstractArray, AbstractArray} - - """Function that has to be applied to the data to convert it to different units""" - preprocess_data_fn::Function - - # TODO: This should be handled by ClimaAnalysis - """preprocess_data_fn is typically used to change units, so we have to tell ClimaAnalysis what the new - units are.""" - new_units::Union{Nothing, AbstractString} -end - -function SimDataSource(; - path, - short_name, - reduction = "average", - period = "10d", - preprocess_data_fn = identity, - new_units = nothing, -) - - sim = ClimaAnalysis.SimDir(path) - # TODO: Add period, for the time-being, we just pick up what's there - var = get(sim; short_name, reduction) - - lonlat = (var.dims["lon"], var.dims["lat"]) - - return SimDataSource(path, short_name, reduction, period, var, lonlat, preprocess_data_fn, new_units) -end - -""" - data_at_date(sim_ds::SimDataSource, date::Dates.DateTime) - -Return the simulation data at the given date. -""" -function data_at_date(sim_ds::SimDataSource, date::Dates.DateTime) - start_date = Dates.DateTime(sim_ds.var.attributes["start_date"]) - time_diff_seconds = (date - start_date) / Dates.Second(1) - return sim_ds.preprocess_data_fn(ClimaAnalysis.slice(sim_ds.var, time = time_diff_seconds).data) -end diff --git a/experiments/ClimaEarth/user_io/leaderboard/utils.jl b/experiments/ClimaEarth/user_io/leaderboard/utils.jl deleted file mode 100644 index bf6472f75..000000000 --- a/experiments/ClimaEarth/user_io/leaderboard/utils.jl +++ /dev/null @@ -1,240 +0,0 @@ -import OrderedCollections: OrderedDict - -""" - isequispaced(arr::Vector) - -Return whether the array is equispaced or not. -""" -function isequispaced(arr::Vector) - return all(diff(arr) .≈ arr[begin + 1] - arr[begin]) -end - -""" - resample( - data::AbstractArray, - src_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - ) - -Resample 2D `data` from `src_lonlat` to `dest_lonlat`. - -Note, "flat" boundary conditions are imposed for the outer edges. This should make sense for -most data sources (that are defined all over the globe but do not necessarily have points at -the poles). Be sure to check that it makes sense for your data. - -The resampling performed here is a 0th-order constant resampling. -""" - -function resample( - data::AbstractArray, - src_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, -) - - # Interpolations.jl wants ranges, so we have to make ranges out of the given src_lonlat - # - # NOTE: We are assuming that lonlat are equispaced. Check this! - vec_to_range(v) = range(v[begin], v[end], length = length(v)) - src_lonlat_ranges = vec_to_range.(src_lonlat) - - itp = Interpolations.linear_interpolation( - src_lonlat_ranges, - data, - extrapolation_bc = (Interpolations.Periodic(), Interpolations.Flat()), - ) - - dest_lon, dest_lat = dest_lonlat - - return [itp(lon, lat) for lon in dest_lon, lat in dest_lat] -end - -""" - integration_weights(lonlat::Tuple{<:AbstractArray, <:AbstractArray}) - -Compute the integration weights for a first-order spherical integration. -""" -function integration_weights(lonlat::Tuple{<:AbstractArray, <:AbstractArray}) - lon, lat = lonlat - - # We are also assuming that they are in degrees - abs(maximum(lon)) <= π && error("longitude should be in degrees") - abs(maximum(lat)) <= π / 2 && error("latitude should be in degrees") - - isequispaced(lon) || error("Longitude is not equispaced") - isequispaced(lat) || error("Latitude is not equispaced") - - dlon_rad = deg2rad(abs(lon[begin + 1] - lon[begin])) - dlat_rad = deg2rad(abs(lat[begin + 1] - lat[begin])) - - return [cosd(lat1) * dlon_rad * dlat_rad for _ in lon, lat1 in lat] -end - -""" - integrate_on_sphere( - data::AbstractArray, - lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - ) - -Integrate `data` on a sphere with a first-order scheme. `data` has to be discretized on -`lonlat`. -""" -function integrate_on_sphere(data::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray}) - lon, lat = lonlat - size_data = size(data) - exp_size = (length(lon), length(lat)) - size_data == exp_size || error("Inconsistent dimensions $size_data != $exp_size") - return sum(data .* integration_weights(lonlat)) ./ sum(integration_weights(lonlat)) -end - -""" - mse( - data1::AbstractArray, - data2::AbstractArray, - lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - ) - -Compute the 2D map of the mean-square error. -""" -function mse(data1::AbstractArray, data2::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray}) - lon, lat = lonlat - size_data1 = size(data1) - size_data2 = size(data2) - exp_size = (length(lon), length(lat)) - size_data1 == exp_size || error("Inconsistent dimensions $size_data1 != $exp_size") - size_data1 == size_data2 || error("Inconsistent dimensions $size_data1 != $size_data2") - - return (data1 .- data2) .^ 2 -end - -""" - bias( - sim::AbstractArray, - obs::AbstractArray, - lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - ) - -Compute the 2D map of the bias (simulated - observed). -""" -function bias(sim::AbstractArray, obs::AbstractArray, lonlat::Tuple{<:AbstractArray, <:AbstractArray}) - lon, lat = lonlat - size_sim = size(sim) - size_obs = size(obs) - exp_size = (length(lon), length(lat)) - size_sim == exp_size || error("Inconsistent dimensions $size_sim != $exp_size") - size_sim == size_obs || error("Inconsistent dimensions $size_obs != $size_data2") - - return sim .- obs -end - -""" - bias( - obs_ds::ObsDataSource, - sim_ds::SimDataSource, - target_dates::AbstractArray{<: Dates.DateTime}, - ) - -Compute the 2D map of the bias (simulated - observed) for data averaged over the given dates. - -The return value is a `ClimaAnalysis.OutputVar` with the computed `rmse` and `bias` among -its attributes. -""" -function bias(obs_ds::ObsDataSource, sim_ds::SimDataSource, target_dates::AbstractArray{<:Dates.DateTime}) - lonlat = sim_ds.lonlat - simulated_data = map(d -> data_at_date(sim_ds, d), target_dates) |> mean - observational_data = map(d -> find_and_resample(obs_ds, d, lonlat), target_dates) |> mean - - bias_arr = bias(simulated_data, observational_data, lonlat) - mse_arr = mse(simulated_data, observational_data, lonlat) - - short_name = ClimaAnalysis.short_name(sim_ds.var) - - bias_dims = OrderedDict("lon" => lonlat[1], "lat" => lonlat[2]) - bias_dim_attribs = Dict{String, Any}() - - rmse = round(sqrt(integrate_on_sphere(mse_arr, lonlat)); sigdigits = 3) - global_bias = round(integrate_on_sphere(bias_arr, lonlat); sigdigits = 3) - - units = isnothing(sim_ds.new_units) ? sim_ds.var.attributes["units"] : sim_ds.new_units - - bias_attribs = Dict{String, Any}( - "short_name" => "sim-obs_$short_name", - "var_short_name" => "$short_name", - "long_name" => "SIM - OBS mean $short_name\n(RMSE: $rmse $units, Global bias: $global_bias $units)", - "rmse" => rmse, - "bias" => global_bias, - "units" => units, - ) - - return ClimaAnalysis.OutputVar(bias_attribs, bias_dims, bias_dim_attribs, bias_arr) -end - -""" - integrate_on_sphere(var::ClimaAnalysis.OutputVar) - -Integrate the given `var` onto the sphere with a first order integration scheme. -""" -function integrate_on_sphere(var::ClimaAnalysis.OutputVar) - lonlat = (var.dims["lon"], var.dims["lat"]) - return integrate_on_sphere(var.data, lonlat) -end - -""" - find_and_resample( - observed_data::ObsDataSource, - date::Dates.DateTime, - dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, - ) - -Find the data corresponding to the given `date` in `observed_data` and resample it to -`dest_lonlat`. -""" -function find_and_resample( - observed_data::ObsDataSource, - date::Dates.DateTime, - dest_lonlat::Tuple{<:AbstractArray, <:AbstractArray}, -) - obs = observed_data - - available_times = obs.ncdataset[observed_data.time_name] - - if observed_data.shift_to_end_of_month - available_times = Dates.DateTime.(Dates.lastdayofmonth.(available_times)) - end - - time_index = ClimaAnalysis.Utils.nearest_index(available_times, date) - - # NOTE: We are hardcoding that the time index is the last one and that there are three - # dimensions! We should generalize this (the NetCDF file contains all the information to - # deduce this). - - data_arr = obs.preprocess_data_fn(obs.ncdataset[obs.var_name][:, :, time_index]) - - lon_arr = obs.ncdataset[obs.lon_name][:] - lat_arr = obs.ncdataset[obs.lat_name][:] - - return resample(data_arr, (lon_arr, lat_arr), dest_lonlat) -end - -""" - split_by_season(dates::AbstractArray{<: Dates.DateTime}) - -Take an array of dates and return 4 split into seasons. -""" -function split_by_season(dates::AbstractArray{<:Dates.DateTime}) - MAM, JJA, SON, DJF = - Vector{Dates.DateTime}(), Vector{Dates.DateTime}(), Vector{Dates.DateTime}(), Vector{Dates.DateTime}() - - for date in dates - if Dates.Month(3) <= Dates.Month(date) <= Dates.Month(5) - push!(MAM, date) - elseif Dates.Month(6) <= Dates.Month(date) <= Dates.Month(8) - push!(JJA, date) - elseif Dates.Month(9) <= Dates.Month(date) <= Dates.Month(11) - push!(SON, date) - else - push!(DJF, date) - end - end - - return (MAM, JJA, SON, DJF) -end diff --git a/test/experiment_tests/leaderboard.jl b/test/experiment_tests/leaderboard.jl deleted file mode 100644 index 03e55571f..000000000 --- a/test/experiment_tests/leaderboard.jl +++ /dev/null @@ -1,90 +0,0 @@ -import Test: @test, @testset, @test_throws -import ClimaComms -@static pkgversion(ClimaComms) >= v"0.6" && ClimaComms.@import_required_backends -import ClimaCoupler -import ClimaAnalysis -import ClimaUtilities.ClimaArtifacts: @clima_artifact -import Dates - -# Load file to test -include("../../experiments/ClimaEarth/user_io/leaderboard.jl") -# Data -include(joinpath(pkgdir(ClimaCoupler), "artifacts", "artifact_funcs.jl")) - -@testset "Leaderboard utils" begin - @test Leaderboard.isequispaced([1, 2, 3]) - @test !Leaderboard.isequispaced([1, 2, 4]) - - input_matrix = reshape(1.0:16, (4, 4)) - - @test Leaderboard.resample(input_matrix, (1.0:4, 1.0:4), ([2.8], [3.7]))[1] == 13.6 - - @test_throws ErrorException Leaderboard.integration_weights(([1.0], [10.0])) - @test_throws ErrorException Leaderboard.integration_weights(([10.0], [1.0])) - @test_throws ErrorException Leaderboard.integration_weights(([10.0, 11.0, 13.0], [10.0])) - @test_throws ErrorException Leaderboard.integration_weights(([10.0, 20.0], [10.0, 11.0, 13.0])) - - @test Leaderboard.integration_weights(([10.0, 20.0], [20.0, 35.0]))[1] ≈ deg2rad(10.0) * deg2rad(15.0) * cosd(20.0) - - @test Leaderboard.integrate_on_sphere(ones(361, 181), (collect(-180.0:1:180.0), collect(-90.0:1:90.0))) ≈ 1 - - @test_throws ErrorException Leaderboard.mse([1], [2, 3], ([1], [2])) - @test_throws ErrorException Leaderboard.mse([1, 2], [2, 3, 4], ([1], [2])) - - @test_throws ErrorException Leaderboard.bias([1], [2, 3], ([1], [2])) - @test_throws ErrorException Leaderboard.bias([1, 2], [2, 3, 4], ([1], [2])) - - dates = [ - Dates.DateTime(2015, 1, 13), - Dates.DateTime(2018, 2, 13), - Dates.DateTime(1981, 7, 6), - Dates.DateTime(1993, 11, 19), - Dates.DateTime(2040, 4, 1), - Dates.DateTime(2000, 8, 18), - ] - - expected_dates = ( - [Dates.DateTime(2040, 4, 1)], - [Dates.DateTime(1981, 7, 6), Dates.DateTime(2000, 8, 18)], - [Dates.DateTime(1993, 11, 19)], - [Dates.DateTime(2015, 1, 13), Dates.DateTime(2018, 2, 13)], - ) - - @test Leaderboard.split_by_season(dates) == expected_dates -end - -@testset "Leaderboard" begin - simdir = ClimaAnalysis.SimDir(@__DIR__) - - preprocess_fn = (data) -> data .* Float32(-1 / 86400) - - # The conversion is technically not correct for this data source, but what - # we care about here is that preprocess_data_fn works - sim_datasource = Leaderboard.SimDataSource(path = @__DIR__, short_name = "pr", preprocess_data_fn = preprocess_fn) - - pr = get(simdir, "pr") - - @test sim_datasource.lonlat[1] == pr.dims["lon"] - @test sim_datasource.lonlat[2] == pr.dims["lat"] - - @test Leaderboard.data_at_date(sim_datasource, Dates.DateTime(1979, 1, 2)) == preprocess_fn(pr.data[1, :, :]) - - obs_datasource = Leaderboard.ObsDataSource(; - path = joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"), - var_name = "precip", - preprocess_data_fn = preprocess_fn, - ) - - lat = obs_datasource.ncdataset["lat"][20] - lon = obs_datasource.ncdataset["lon"][30] - - @test Leaderboard.find_and_resample(obs_datasource, Dates.DateTime(1979, 1, 5), ([lon], [lat]))[1] == - preprocess_fn(obs_datasource.ncdataset["precip"][30, 20, 1]) - - # A very weak test, to make sure we can call the function. - # We assume the key functions bias and rmse are tested elsewhere. - computed_bias = - Leaderboard.bias(obs_datasource, sim_datasource, [Dates.DateTime(1979, 1, 1), Dates.DateTime(1979, 1, 2)]) - @test haskey(computed_bias.attributes, "rmse") - @test haskey(computed_bias.attributes, "bias") -end diff --git a/test/runtests.jl b/test/runtests.jl index 7aab18475..a462ae293 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -50,10 +50,7 @@ end gpu_broken || @safetestset "experiment test: CoupledSims tests" begin include("experiment_tests/coupled_sims.jl") end -# This test fails because of the undownloadable artifact -# @safetestset "experiment test: Leaderboard" begin -# include("experiment_tests/leaderboard.jl") -# end + @safetestset "component model test: ClimaAtmos" begin include("component_model_tests/climaatmos_tests.jl") end