diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index cd25af4ae1..4cb27bb4f4 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -311,7 +311,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-AEROSOL | abs550aer, od550aer, od550aerStderr, od550lt1aer, od870aer, od870aerStderr (aero) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| ESACCI-CLOUD | clivi, clt, cltStderr, clwvi, lwp, rlut, rlutcs, rsut, rsutcs, rsdt, rlus, rsus, rsuscs (Amon) | 2 | NCL | +| ESACCI-CLOUD | clivi, clt, cltStderr, clwvi, lwp, rlut, rlutcs, rsut, rsutcs, rsdt, rlus, rsus, rsuscs (Amon), | 2 | Python | +| | clt, clwvi, cod (day) | 2 | | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-FIRE | burntArea (Lmon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml new file mode 100644 index 0000000000..02b021f245 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml @@ -0,0 +1,122 @@ +# CMORIZE ESA CCI CLOUD daily/monthly data +--- +start_year_monthly: 1982 +end_year_monthly: 2016 +start_year_daily: 2003 +end_year_daily: 2007 + +daily_data: False + +# Common global attributes for Cmorizer output +attributes: + dataset_id: ESACCI-CLOUD + version: 'v3.0-AVHRR' + tier: 2 + project_id: OBS6 + source: 'ESA CCI' + modeling_realm: sat + reference: 'esacci_cloud' + comment: '' + + +# Variables to cmorize +variables: + # daily data + clt_day: + short_name: clt + mip: day + raw: [cmask_desc, cmask_asc] + raw_units: '1' + file: '-ESACCI-L3U_CLOUD-CLD_MASKTYPE-AVHRR_*-fv3.0.nc' + clwvi_day: + short_name: clwvi + mip: CFday + raw: [cwp_desc, cwp_asc] + raw_units: g/m2 + file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + cod_day: + short_name: cod + mip: day + raw: [cot_desc, cot_asc] + raw_units: 1 + file: '-ESACCI-L3U_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + + # monthly data + clt_mon: + short_name: clt + mip: Amon + raw: cfc + raw_units: '1' + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + cltStderr_mon: + short_name: cltStderr + mip: Amon + raw: cfc_unc + raw_units: '%' + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + lwp_mon: + short_name: lwp + mip: Amon + raw: lwp_allsky + raw_units: g/m2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + clivi_mon: + short_name: clivi + mip: Amon + raw: iwp_allsky + raw_units: g/m2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + clwvi_mon: + short_name: clwvi + mip: Amon + raw: iwp_allsky + raw_units: g/m2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rlut_mon: + short_name: rlut + mip: Amon + raw: toa_lwup + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rlutcs_mon: + short_name: rlutcs + mip: Amon + raw: toa_lwup_clr + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rsut_mon: + short_name: rsut + mip: Amon + raw: toa_swup + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rsutcs_mon: + short_name: rsutcs + mip: Amon + raw: toa_swup_clr + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rsdt_mon: + short_name: rsdt + mip: Amon + raw: toa_swdn + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rlus_mon: + short_name: rlus + mip: Amon + raw: boa_lwup + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rsus_mon: + short_name: rsus + mip: Amon + raw: boa_swup + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' + rsuscs_mon: + short_name: rsuscs + mip: Amon + raw: boa_swup_clr + raw_units: W m-2 + file: '-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-fv3.0.nc' diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py index dd0a11923f..cb434334ac 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_cloud.py @@ -1,10 +1,14 @@ -"""Script to download ESACCI-CLOUD.""" +"""Script to download daily and monthly ESACCI-CLOUD data.""" +import logging from datetime import datetime from dateutil import relativedelta -from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader +from esmvaltool.cmorizers.data.utilities import read_cmor_config + +logger = logging.getLogger(__name__) def download_dataset( @@ -27,40 +31,129 @@ def download_dataset( overwrite : bool Overwrite already downloaded files """ + + start_date_day = False if start_date is None: start_date = datetime(1982, 1, 1) + start_date_day = datetime(2003, 1, 1) if end_date is None: - end_date = datetime(2016, 1, 1) + end_date = datetime(2016, 12, 31) loop_date = start_date - downloader = CCIDownloader( + downloader = WGetDownloader( config=config, dataset=dataset, dataset_info=dataset_info, overwrite=overwrite, ) - downloader.connect() - end_of_file = "ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_NOAA-12-fv3.0.nc" - filler_data = { - 1994: [ - f"AVHRR_NOAA_12/1994/199409-{end_of_file}", - f"AVHRR_NOAA_12/1994/199410-{end_of_file}", - f"AVHRR_NOAA_12/1994/199411-{end_of_file}", - f"AVHRR_NOAA_12/1994/199412-{end_of_file}", - ], - 1995: [ - f"AVHRR_NOAA_12/1995/199501-{end_of_file}", - ], - } + + # check if daily data needs to be downloaded + cmor_config = read_cmor_config(dataset) + daily_data = cmor_config["daily_data"] + if not daily_data: + logger.info( + 'If daily data needs to be downloaded change "daily_data" in the ' + 'cmor_config file to "True" ' + "(esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml)" + ) + + # Base paths for L3U (daily data) and L3C (monthly data) + base_path_l3u = ( + "https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/" + "CLD_PRODUCTS/v3.0/L3U/" + ) + base_path_l3c = ( + "https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/" + "CLD_PRODUCTS/v3.0/L3C/" + ) + + wget_options = [ + "-r", + "-e robots=off", # Ignore robots.txt + "--no-parent", # Don't ascend to the parent directory + '--reject="index.html"', # Reject any HTML files + ] while loop_date <= end_date: year = loop_date.year - downloader.set_cwd("version3/L3C/AVHRR-PM/v3.0") - for folder in downloader.list_folders(): - for year_folder in downloader.list_folders(folder): - if int(year_folder) == year: - downloader.download_year(f"{folder}/{year_folder}") - downloader.set_cwd("version3/L3C/AVHRR-AM/v3.0") - for extra_file in filler_data.get(year, []): - downloader.download_file(extra_file) - loop_date += relativedelta.relativedelta(years=1) + month = loop_date.month + date = f"{year}{month:02}" + + if datetime(1982, 1, 1) <= loop_date < datetime(1985, 2, 1): + sat_am = "" + sat_pm = "AVHRR-PM/AVHRR_NOAA-7/" + elif datetime(1985, 2, 1) <= loop_date < datetime(1988, 11, 1): + sat_am = "" + sat_pm = "AVHRR-PM/AVHRR_NOAA-9/" + elif datetime(1988, 11, 1) <= loop_date < datetime(1991, 9, 1): + sat_am = "" + sat_pm = "AVHRR-PM/AVHRR_NOAA-11/" + elif datetime(1991, 9, 1) <= loop_date < datetime(1994, 9, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-12/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-11/" + elif datetime(1994, 9, 1) <= loop_date < datetime(1995, 2, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-12/" + sat_pm = "" + elif datetime(1995, 2, 1) <= loop_date < datetime(1999, 1, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-12/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-14/" + elif datetime(1999, 1, 1) <= loop_date < datetime(2001, 4, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-15/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-14/" + elif datetime(2001, 4, 1) <= loop_date < datetime(2002, 11, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-15/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-16/" + elif datetime(2002, 11, 1) <= loop_date < datetime(2005, 9, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-17/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-16/" + elif datetime(2005, 9, 1) <= loop_date < datetime(2007, 7, 1): + sat_am = "AVHRR-AM/AVHRR_NOAA-17/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-18/" + elif datetime(2007, 7, 1) <= loop_date < datetime(2009, 6, 1): + sat_am = "AVHRR-AM/AVHRR_METOPA/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-18/" + elif datetime(2009, 6, 1) <= loop_date < datetime(2017, 1, 1): + sat_am = "AVHRR-AM/AVHRR_METOPA/" + sat_pm = "AVHRR-PM/AVHRR_NOAA-19/" + else: + msg = f"Data for this date {date} is not available" + raise ValueError(msg) + + # Download monthly data from L3C + for sat in (sat_am, sat_pm): + if sat != "": + # monthly data + logger.info("Downloading monthly data (L3C) for sat = %s", sat) + folder_l3c = base_path_l3c + sat + f"{year}/" + wget_options_l3c = wget_options.copy() + wget_options_l3c.append(f"--accept={date}*.nc") + logger.info( + "Download folder for monthly data (L3C): %s", folder_l3c + ) + downloader.download_file(folder_l3c, wget_options_l3c) + + # daily data + if daily_data: + if not start_date_day or ( + start_date_day + and datetime(2003, 1, 1) + <= loop_date + <= datetime(2007, 2, 1) + ): + logger.info( + "Downloading daily data (L3U) for sat = %s", sat + ) + folder_l3u = base_path_l3u + sat + f"{year}/{month:02}" + wget_options_l3u = wget_options.copy() + wget_options_l3u.append( + f"--accept={date}*CLD_MASKTYPE*.nc," + f"{date}*CLD_PRODUCTS*.nc" + ) + logger.info( + "Download folder for daily data (L3U): %s", + folder_l3u, + ) + downloader.download_file(folder_l3u, wget_options_l3u) + + # Increment the loop_date by one month + loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.ncl b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.ncl deleted file mode 100644 index b19ae0b865..0000000000 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.ncl +++ /dev/null @@ -1,230 +0,0 @@ -; ############################################################################# -; ESMValTool CMORizer for ESACCI-CLOUD data -; ############################################################################# -; -; Tier -; Tier 2: other freely-available dataset. -; -; Source -; https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/CLD_PRODUCTS/v3.0/ -; -; Last access -; 20210428 -; -; Download and processing instructions -; Download the data from: -; L3C/AVHRR-AM/ and L3C/AVHRR-PM, e.g.: -; wget -r -nH -e robots=off --cut-dirs=9 --no-parent -; --reject="index.html*" -; https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/ -; CLD_PRODUCTS/v3.0/L3C/AVHRR-AM/ -; wget -r -nH -e robots=off --cut-dirs=9 --no-parent -; --reject="index.html*" -; https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/ -; CLD_PRODUCTS/v3.0/L3C/AVHRR-PM/ -; -; All files are expected in a single directory (no subdirectories -; with years). -; -; Modification history -; 20230818-lauer_axel: added output of clwvi (in addition to iwp, lwp) -; 20210428-lauer_axel: AVHRR-AM and AVHRR-PM data are now averaged during -; the overlapping time; TOA radiative fluxes are now -; also processed -; 20190201-righi_mattia: adapted to v2. -; 20181116-lauer_axel: changed to use CLOUD-CCI v3.0 data (AVHRR-PM), gaps -; (1994/09 - 1995/01) are filled with AVHRR-AM data -; 20180522-righi_mattia: changed to use AVHRR-PM data. -; 20160519-laue_axel: written (reformat_obs_ESACCI-AEROSOL.ncl). -; -; ############################################################################# -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/interface.ncl") - -begin - - ; Script name (for logger) - DIAG_SCRIPT = "esacci_cloud.ncl" - - ; Source name - OBSNAME = "ESACCI-CLOUD" - - ; Tier - TIER = 2 - - ; Period - YEAR1 = get_year(start_year, 1982) - YEAR2 = get_year(end_year, 2016) - - ; Selected variable (standard name) - VAR = (/"clt", "cltStderr", "clivi", "lwp", "clwvi", "rlut", "rlutcs", \ - "rsut", "rsutcs", "rsdt", "rlus", "rsus", "rsuscs"/) - - ; Name in the raw data - NAME = (/"cfc", "cfc_unc", "iwp_allsky", "lwp_allsky", "iwp_allsky", \ - "toa_lwup", "toa_lwup_clr", "toa_swup", "toa_swup_clr", \ - "toa_swdn", "boa_lwup", "boa_swup", "boa_swup_clr"/) - - ; Conversion factor - CONV = (/100., 1., 0.001, 0.001, 0.001, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, \ - 1.0, 1.0/) - - ; MIP - MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "Amon", "Amon", "Amon", \ - "Amon", "Amon", "Amon", "Amon", "Amon"/) - - ; Frequency - FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon", "mon", "mon", \ - "mon", "mon", "mon", "mon"/) - - ; CMOR table - CMOR_TABLE = getenv("cmor_tables") + \ - (/"/cmip5/Tables/CMIP5_Amon", \ - "/custom/CMOR_cltStderr.dat", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/custom/CMOR_lwp.dat", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon", \ - "/cmip5/Tables/CMIP5_Amon"/) - - ; Type - TYPE = "sat" - - ; Version - VERSION = "AVHRR-AMPM-fv3.0" - - ; Global attributes - SOURCE = "https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/" + \ - "CLD_PRODUCTS/v3.0/" - REF = \ - "Stengel et al., Earth Syst. Sci. Data, doi:10.5194/essd-9-881-2017, 2017" - COMMENT = "" - -end - -begin - - firstime = True - - do vv = 0, dimsizes(VAR) - 1 - - log_info("Processing " + VAR(vv) + " (" + MIP(vv) + ")") - - time = create_timec(YEAR1, YEAR2) - date = cd_calendar(time, 1) - - ; Create timeseries - do yy = YEAR1, YEAR2 - - syear = sprinti("%i", yy) - do mm = 1, 12 - - smonth = sprinti("%0.2i", mm) - - ; Read file - fname = systemfunc("ls " + input_dir_path + syear + smonth + \ - "-ESACCI-L3C_CLOUD-CLD_PRODUCTS-AVHRR_*-" + \ - str_sub_str(VERSION, "AVHRR-AMPM-", "") + ".nc") - - ; No files found - if (all(ismissing(fname))) then - continue - end if - - ifiles = dimsizes(fname) - log_info(syear + smonth + ": " + tostring(ifiles) + " input file(s)") - - do i = 0, ifiles - 1 - ; Extract data - f = addfile(fname(i), "r") - xx = f->$NAME(vv)$ - - if (i .eq. 0) then - xdims = array_append_record((/ifiles/), dimsizes(xx), 0) - xx_all = new(xdims, float) - delete(xdims) - end if - - ; Convert units - xx_all(i, :, :, :) = xx * CONV(vv) - - ; *** calculate clwvi (lwp + iwp) *** - if (VAR(vv) .eq. "clwvi") then - xx2 = f->lwp_allsky * 0.001 - xx_all(i, :, :, :) = xx_all(i, :, :, :) + xx2 - delete(xx2) - end if - - if (firstime) then - lat = f->lat - lon = f->lon - firstime = False - end if - delete(f) - end do ; loop over all files (am/pm) per date - - delete(fname) - delete(xx) - - xx = dim_avg_n(xx_all, 0) ; ignore missing values - delete(xx_all) - - ; Assign to global array - if (.not.isdefined("output")) then - dims = dimsizes(xx) - dims(0) = dimsizes(time) - output = new(dims, float) - output!0 = "time" - output&time = time - output!1 = "lat" - output&lat = lat - output!2 = "lon" - output&lon = lon - end if - output(ind(toint(yy * 100 + mm).eq.date), :, :) = (/xx/) - - end do - end do - - ; Set fill value - output = where(output.eq.-999, output@_FillValue, output) - - ; Format coordinates - output!0 = "time" - output!1 = "lat" - output!2 = "lon" - format_coords(output, YEAR1 + "0101", YEAR2 + "1231", FREQ(vv)) - - ; Set variable attributes - tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) - delete(output) - output = tmp - delete(tmp) - - ; Calculate coordinate bounds - bounds = guess_coord_bounds(output, FREQ(vv)) - - ; Set global attributes - gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF, COMMENT) - - ; Output file - DATESTR = YEAR1 + "01-" + YEAR2 + "12" - fout = output_dir_path + \ - str_join((/"OBS", OBSNAME, TYPE, VERSION, \ - MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" - - ; Write variable - write_nc(fout, VAR(vv), output, bounds, gAtt) - delete(gAtt) - delete(output) - delete(bounds) - - end do - -end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py new file mode 100644 index 0000000000..c40801ffc6 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_cloud.py @@ -0,0 +1,441 @@ +"""ESMValTool CMORizer for ESACCI-CLOUD data. + +Tier + Tier 2: other freely-available dataset. + +Source + https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/CLD_PRODUCTS/v3.0/L3U/AVHRR-PM/ + +Last access + 20230619 + +Download and processing instructions + see downloading script +""" + +import copy +import glob +import logging +import os +from calendar import monthrange +from datetime import datetime + +import cf_units +import iris +import numpy as np +from dask import array as da +from dateutil import relativedelta +from esmvalcore.preprocessor import ( + daily_statistics, + monthly_statistics, + regrid, +) +from iris import NameConstraint + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _create_nan_cube(cube, year, month, day): + """Create cube containing only nan from existing cube.""" + nan_cube = cube.copy() + nan_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(nan_cube.coord("time").units) + dataset_time_calender = nan_cube.coord("time").units.calendar + newtime = datetime( + year=year, + month=month, + day=day, + hour=12, + minute=0, + second=0, + microsecond=0, + ) + newtime_num = cf_units.date2num( + newtime, dataset_time_unit, dataset_time_calender + ) + nan_cube.coord("time").points = float(newtime_num) + + return nan_cube + + +def _handle_missing_day(year, month, iday, short_name, cubes, cubes_day): + """Fill missing day.""" + daily_cube = _create_nan_cube(cubes_day[0], year, month, iday) + if short_name in ["clt", "ctp"]: + cubes.append(daily_cube) + cubes_day.append(daily_cube) + + +def _check_for_missing_months(cube, cubes): + """Check for dates which are missing in the cube and fill with NaNs.""" + time_points_array = cube.coord("time").units.num2date( + cube.coord("time").points + ) + loop_date = datetime( + time_points_array[0].year, time_points_array[0].month, 1 + ) + while loop_date <= datetime(time_points_array[-1].year, 12, 1): + if not any([time == loop_date for time in time_points_array]): + logger.debug( + "No data available for %d/%d", loop_date.month, loop_date.year + ) + nan_cube = cubes[0].copy( + np.ma.masked_invalid( + np.full(cubes[0].shape, np.nan, dtype=cubes[0].dtype) + ) + ) + nan_cube.coord("time").points = float( + nan_cube.coord("time").units.date2num(loop_date) + ) + nan_cube.coord("time").bounds = None + cubes.append(nan_cube) + loop_date += relativedelta.relativedelta(months=1) + + cube = cubes.concatenate_cube() + return cube + + +def _concatenate_and_save_daily_cubes( + short_name, var, cubes, cubes_day, out_dir, cfg, cmor_info +): + """Concatinate and save yearly cubes.""" + # Calc daily + # All data points + if short_name in ["clt", "ctp"]: + cube = cubes.concatenate_cube() + cube = daily_statistics(cube) + cube.coord("time").points = [ + int(tpoint) + 0.5 for tpoint in cube.coord("time").points + ] + # Regridding from 0.05x0.05 to 0.5x0.5 + cube = regrid(cube, target_grid="0.5x0.5", scheme="area_weighted") + # Fix units + if short_name == "clt": + cube.data = 100 * cube.core_data() + else: + if "raw_units" in var: + cube.units = var["raw_units"] + cube.convert_units(cmor_info.units) + # Fix metadata and update version information + cube = utils.fix_coords(cube) + utils.fix_var_metadata(cube, cmor_info) + attrs = copy.deepcopy(cfg["attributes"]) + attrs["mip"] = var["mip"] + attrs["version"] += "-AMPM" + utils.set_global_atts(cube, attrs) + # Save variable + utils.save_variable( + cube, short_name, out_dir, attrs, unlimited_dimensions=["time"] + ) + + # Data points only when daylight + cube_day = cubes_day.concatenate_cube() + cube_day = daily_statistics(cube_day) + cube_day.coord("time").points = [ + int(tpoint) + 0.5 for tpoint in cube_day.coord("time").points + ] + # Regridding from 0.05x0.05 to 0.5x0.5 + cube_day = regrid(cube_day, target_grid="0.5x0.5", scheme="area_weighted") + # Fix units + if short_name == "clt": + cube_day.data = 100 * cube_day.core_data() + else: + if "raw_units" in var: + cube_day.units = var["raw_units"] + cube_day.convert_units(cmor_info.units) + # Fix metadata and update version information + cube_day = utils.fix_coords(cube_day) + utils.fix_var_metadata(cube_day, cmor_info) + attrs_day = copy.deepcopy(cfg["attributes"]) + attrs_day["mip"] = var["mip"] + attrs_day["version"] += "-AMPM-daylight" + utils.set_global_atts(cube_day, attrs_day) + # Save variable + utils.save_variable( + cube_day, + short_name, + out_dir, + attrs_day, + unlimited_dimensions=["time"], + ) + + +def _concatenate_and_save_monthly_cubes( + short_name, var, cubes, out_dir, attach, cfg, cmor_info +): + """Concatinate monthly files and save.""" + # After gathering all cubes for all years, concatenate them + cube = cubes.concatenate_cube() + + # Check for missing months + cube = _check_for_missing_months(cube, cubes) + + if attach == "-AMPM": + cube = monthly_statistics(cube) + + # Regrid the cube to the target grid (e.g., 0.5x0.5) + cube = regrid(cube, target_grid="0.5x0.5", scheme="area_weighted") + + # Fix units and handle any special cases like 'clt' + if short_name == "clt": + cube.data = 100 * cube.core_data() + else: + if "raw_units" in var: + cube.units = var["raw_units"] + cube.convert_units(cmor_info.units) + + # Set global attributes and fix metadata + utils.fix_var_metadata(cube, cmor_info) + attrs = copy.deepcopy(cfg["attributes"]) + attrs["mip"] = var["mip"] + attrs["version"] += attach + utils.set_global_atts(cube, attrs) + + # Save the processed variable + utils.save_variable( + cube, short_name, out_dir, attrs, unlimited_dimensions=["time"] + ) + + +def _process_daily_file( + ifile, inum, short_name, var, cmor_info, cubes, cubes_day +): + """Extract variable from daily file.""" + logger.info("CMORizing file %s", ifile) + # Extract raw names from the variable dictionary + raw_var = var.get("raw", short_name) + + for ivar, raw_name in enumerate(raw_var): + logger.info("Extracting raw variable %s", raw_name) + + # Define variable for daylight + if "_asc" in raw_name: + illum = "illum_asc" + else: + illum = "illum_desc" + + # Load cube using a constraint based on the raw_name + daily_cube = iris.load_cube(ifile, NameConstraint(var_name=raw_name)) + daily_cube_ilum = iris.load_cube(ifile, NameConstraint(var_name=illum)) + + # Set arbitrary time of day (in the end a daily mean is calculated) + daily_cube.coord("time").points = ( + daily_cube.coord("time").points + (inum + 0.5 * ivar) * 0.1 + ) + daily_cube.attributes.clear() + daily_cube.coord("time").long_name = "time" + + # Fix coordinates + daily_cube = utils.fix_coords(daily_cube) + # Fix dtype + utils.fix_dtype(daily_cube) + # Fix metadata + utils.fix_var_metadata(daily_cube, cmor_info) + + # Check for daylight + daily_cube_day = daily_cube.copy() + daily_cube_day.data = da.ma.masked_where( + daily_cube_ilum.core_data() > 1, daily_cube_day.core_data() + ) + + if short_name in ["clt", "ctp"]: + cubes.append(daily_cube) + cubes_day.append(daily_cube_day) + + +def _process_monthly_file( + ifile, short_name, var, cmor_info, cubes_am, cubes_pm +): + """Extract variable from monthly file.""" + logger.info("CMORizing file %s for variable %s", ifile, short_name) + # Extract raw names from the variable dictionary + raw_name = var.get("raw", short_name) + # Try to load the cube using a constraint based on the raw_name + monthly_cube = iris.load_cube(ifile, NameConstraint(var_name=raw_name)) + + if short_name == "clwvi": + logger.info("Adding lwp and clivi") + cube_lwp = iris.load_cube(ifile, NameConstraint(var_name="lwp_allsky")) + monthly_cube.data = monthly_cube.core_data() + cube_lwp.core_data() + + if monthly_cube is None: + logger.warning("Cube could not be loaded for file '%s'", ifile) + return # Skip this file and move to the next + + monthly_cube.attributes.clear() + monthly_cube.coord("time").long_name = "time" + + # Fix coordinates + monthly_cube = utils.fix_coords(monthly_cube) + # Fix data type + utils.fix_dtype(monthly_cube) + # Fix metadata + utils.fix_var_metadata(monthly_cube, cmor_info) + + # Add the cube to the list + if any( + sat_am in ifile + for sat_am in ( + "AVHRR_NOAA-12", + "AVHRR_NOAA-15", + "AVHRR_NOAA-17", + "AVHRR_METOPA", + ) + ): + cubes_am.append(monthly_cube) + elif any( + sat_pm in ifile + for sat_pm in ( + "AVHRR_NOAA-7", + "AVHRR_NOAA-9", + "AVHRR_NOAA-11", + "AVHRR_NOAA-14", + "AVHRR_NOAA-16", + "AVHRR_NOAA-18", + "AVHRR_NOAA-19", + ) + ): + cubes_pm.append(monthly_cube) + else: + raise ValueError(f"The file {ifile} is not assigned to AM or PM") + + +def _extract_variable_daily( + short_name, var, cfg, in_dir, out_dir, start_date, end_date +): + """Extract daily variable.""" + cmor_info = cfg["cmor_table"].get_variable(var["mip"], short_name) + + if not start_date: + start_date = datetime(cfg["start_year_daily"], 1, 1) + if not end_date: + end_date = datetime(cfg["end_year_daily"], 12, 31) + + for year in range(start_date.year, end_date.year + 1): + # check if data is available + filelist = glob.glob(os.path.join(in_dir, f"{year}*{var['file']}")) + if not filelist: + raise ValueError(f"No daily data available for year {year}") + + cubes = iris.cube.CubeList() + cubes_day = iris.cube.CubeList() + + for month in range(1, 13): + num_days = monthrange(year, month)[1] + for iday in range(1, num_days + 1): + filelist = glob.glob( + os.path.join( + in_dir, + f"{year}{month:02}{iday:02}{var['file']}", + ) + ) + if filelist: + for inum, ifile in enumerate(filelist): + _process_daily_file( + ifile, + inum, + short_name, + var, + cmor_info, + cubes, + cubes_day, + ) + else: + logger.info( + f"No data available for day {year}-{month:02}-{iday:02}" + ) + _handle_missing_day( + year, month, iday, short_name, cubes, cubes_day + ) + + _concatenate_and_save_daily_cubes( + short_name, var, cubes, cubes_day, out_dir, cfg, cmor_info + ) + + +def _extract_variable_monthly( + short_name, var, cfg, in_dir, out_dir, start_date, end_date +): + """Extract monthly variable with improved handling for multiple cubes.""" + cmor_info = cfg["cmor_table"].get_variable(var["mip"], short_name) + + if not start_date: + start_date = datetime(cfg["start_year_monthly"], 1, 1) + if not end_date: + end_date = datetime(cfg["end_year_monthly"], 12, 31) + + cubes_am = iris.cube.CubeList() + cubes_pm = iris.cube.CubeList() + + for year in range(start_date.year, end_date.year + 1): + for month in range(1, 13): # Loop through all months (1-12) + # Construct the file list for the current month + filelist = glob.glob( + os.path.join(in_dir, f"{year}{month:02}{var['file']}") + ) + + if not filelist: + raise ValueError( + f"No monthly file found for {year}-{month:02}" + ) + + for ifile in filelist: + _process_monthly_file( + ifile, short_name, var, cmor_info, cubes_am, cubes_pm + ) + + if cubes_am: + _concatenate_and_save_monthly_cubes( + short_name, var, cubes_am, out_dir, "-AM", cfg, cmor_info + ) + + if cubes_pm: + _concatenate_and_save_monthly_cubes( + short_name, var, cubes_pm, out_dir, "-PM", cfg, cmor_info + ) + + if cubes_am and cubes_pm: + # change day value in cubes_pm for concatinating + for cube in cubes_am: + time_coord = cube.coord("time") + new_time_points = [tpoint + 1 for tpoint in time_coord.points] + time_coord.points = new_time_points + + cubes_combined = cubes_am + cubes_pm + _concatenate_and_save_monthly_cubes( + short_name, var, cubes_combined, out_dir, "-AMPM", cfg, cmor_info + ) + + if not (cubes_am or cubes_pm): + raise ValueError("No valid cubes processed") + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """CMORization function call.""" + # Run the cmorization + for var_name, var in cfg["variables"].items(): + short_name = var["short_name"] + logger.info("CMORizing variable '%s'", var_name) + if "L3U" in var["file"]: + if cfg["daily_data"]: + _extract_variable_daily( + short_name, var, cfg, in_dir, out_dir, start_date, end_date + ) + else: + logger.info( + 'If daily data needs to be formatted change "daily_data" in the ' + 'cmor_config file to "True" ' + "(esmvaltool/cmorizers/data/cmor_config/ESACCI-CLOUD.yml)" + ) + elif "L3C" in var["file"]: + _extract_variable_monthly( + short_name, var, cfg, in_dir, out_dir, start_date, end_date + ) + else: + raise ValueError( + "Filename cannot be assigned to monthly or daily data." + ) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index f33372c73f..a31cc1cf50 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -205,22 +205,54 @@ diagnostics: ESACCI-CLOUD: description: ESACCI-CLOUD check variables: - clivi: + clivi: &var_mon + mip: Amon + start_year: 1982 + end_year: 2016 + additional_datasets: + - {dataset: ESACCI-CLOUD, project: OBS6, tier: 2, type: sat, version: v3.0-AVHRR-AMPM} + - {dataset: ESACCI-CLOUD, project: OBS6, tier: 2, type: sat, version: v3.0-AVHRR-AM, start_year: 1992} + - {dataset: ESACCI-CLOUD, project: OBS6, tier: 2, type: sat, version: v3.0-AVHRR-PM} clt: + <<: *var_mon cltStderr: + <<: *var_mon clwvi: + <<: *var_mon lwp: + <<: *var_mon rlut: + <<: *var_mon rlutcs: + <<: *var_mon rsut: + <<: *var_mon rsutcs: + <<: *var_mon rsdt: + <<: *var_mon rlus: + <<: *var_mon rsus: + <<: *var_mon rsuscs: - additional_datasets: - - {dataset: ESACCI-CLOUD, project: OBS, mip: Amon, tier: 2, - type: sat, version: AVHRR-AMPM-fv3.0, start_year: 1982, end_year: 2016} + <<: *var_mon + clt_day: &var_day + short_name: clt + mip: day + start_year: 2003 + end_year: 2007 + additional_datasets: + - {dataset: ESACCI-CLOUD, project: OBS6, tier: 2, type: sat, version: v3.0-AVHRR-AMPM} + clwvi_day: + <<: *var_day + short_name: clwvi + mip: CFday + version: v3.0-AVHRR-AMPM-daylight + cod_day: + <<: *var_day + short_name: cod + version: v3.0-AVHRR-AMPM-daylight scripts: null diff --git a/esmvaltool/references/esacci_cloud.bibtex b/esmvaltool/references/esacci_cloud.bibtex new file mode 100644 index 0000000000..0a2ec5d128 --- /dev/null +++ b/esmvaltool/references/esacci_cloud.bibtex @@ -0,0 +1,10 @@ +@article{esacci-cloud, + doi = {10.5194/essd-12-41-202}, + url = {https://doi.org/10.5194/essd-12-41-2020}, + year = 2020, + volume = {12}, + pages = {41--60} + author = {Stengel, M., Stapelberg, S., Sus, O., Finkensieper, S., Würzler, B., Philipp, D., Hollmann, R., Poulsen, C., Christensen, M., and McGarragh, G.}, + title = {Cloud_cci Advanced Very High Resolution Radiometer post meridiem (AVHRR-PM) dataset version 3: 35-year climatology of global cloud and radiation properties}, + journal = {Earth Syst. Sci. Data} +}