Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ main
`Utils.isequispaced` is now more efficient: it fails fast and does not allocate
as much. More redundant allocations due to `Utils.isequispaced` were fixed.

### Features

- Reduced allocations in regridding. New method `read!`.
Existing `read` now returns a copy of the data when returning from the cache.
PR [#119](https://github.com/CliMA/ClimaUtilities.jl/pull/119)

v0.1.15
-------

Expand Down
6 changes: 4 additions & 2 deletions docs/src/filereaders.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ The only file reader currently implemented is the `NCFileReader`, used to read
NetCDF files. Each `NCFileReader` is associated to one particular file and
variable (but multiple `NCFileReader`s can share the same file).

Once created, `NCFileReader` is accessed with the `read(file_reader, date)`
Once created, `NCFileReader` is accessed with the `read!(file_reader, date)`
function, which returns the `Array` associated to given `date` (if available).
The `date` can be omitted if the data is static.
The `date` can be omitted if the data is static. The data is stored in a
preallocated array so it can be accessed multiple times without reallocating.

`NCFileReader`s implement two additional features: (1) optional preprocessing,
and (2) cache reads. `NCFileReader`s can be created with a `preprocessing_func`
Expand Down Expand Up @@ -78,6 +79,7 @@ close(v_var)
```@docs
ClimaUtilities.FileReaders.NCFileReader
ClimaUtilities.FileReaders.read
ClimaUtilities.FileReaders.read!
ClimaUtilities.FileReaders.available_dates
ClimaUtilities.FileReaders.close_all_ncfiles
Base.close
Expand Down
26 changes: 24 additions & 2 deletions ext/DataHandlingExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import ClimaCore: ClimaComms

import ClimaUtilities.DataStructures
import ClimaUtilities.Regridders
import ClimaUtilities.FileReaders: AbstractFileReader, NCFileReader, read
import ClimaUtilities.FileReaders: AbstractFileReader, NCFileReader, read, read!
import ClimaUtilities.Regridders: AbstractRegridder, regrid

import ClimaUtilities.Utils: isequispaced, period_to_seconds_float
Expand Down Expand Up @@ -57,6 +57,7 @@ struct DataHandler{
CACHE <: DataStructures.LRUCache{Dates.DateTime, ClimaCore.Fields.Field},
FUNC <: Function,
NAMES <: AbstractArray{<:AbstractString},
PR <: AbstractDict{<:AbstractString, <:AbstractArray},
}
"""Dictionary of variable names and objects responsible for getting the input data from disk to memory"""
file_readers::FR
Expand Down Expand Up @@ -87,6 +88,9 @@ struct DataHandler{

"""Names of the datasets in the NetCDF that have to be read and processed"""
varnames::NAMES

"""Preallocated memory for storing read dataset"""
preallocated_read_data::PR
end

"""
Expand All @@ -105,6 +109,8 @@ In the latter case, the entries of `file_paths` and `varnames` are expected to m

The DataHandler maintains an LRU cache of Fields that were previously computed.

Creating this object results in the file being accessed (to preallocate some memory).

Positional arguments
=====================

Expand Down Expand Up @@ -264,6 +270,13 @@ function DataHandling.DataHandler(
available_times = period_to_seconds_float.(available_dates .- start_date)
dimensions = first(values(file_readers)).dimensions

# Preallocate space for each variable to be read
one_date = isempty(available_dates) ? () : (first(available_dates),)
preallocated_read_data = Dict(
varname => read(file_readers[varname], one_date...) for
varname in varnames
)

return DataHandler(
file_readers,
regridder,
Expand All @@ -275,6 +288,7 @@ function DataHandling.DataHandler(
_cached_regridded_fields,
compose_function,
varnames,
preallocated_read_data,
)
end

Expand Down Expand Up @@ -460,11 +474,19 @@ function DataHandling.regridded_snapshot(
regrid_args = (date,)
end
elseif regridder_type == :InterpolationsRegridder

# Read input data from each file, maintaining order, and apply composing function
# In the case of a single input variable, it will remain unchanged
for varname in varnames
read!(
data_handler.preallocated_read_data[varname],
data_handler.file_readers[varname],
date,
)
end
data_composed = compose_function(
(
read(data_handler.file_readers[varname], date) for
data_handler.preallocated_read_data[varname] for
varname in varnames
)...,
)
Expand Down
50 changes: 40 additions & 10 deletions ext/NCFileReaderExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@ end
Read and preprocess the data at the given `date`.
"""
function FileReaders.read(file_reader::NCFileReader, date::Dates.DateTime)
# For cache hits, return a copy to give away ownership of the data (if we were to just
# return _cached_reads[date], modifying the return value would modify the private state
# of the file reader)

if haskey(file_reader._cached_reads, date)
return copy(file_reader._cached_reads[date])
end

# DateTime(0) is the sentinel value for static datasets
if date == Dates.DateTime(0)
return get!(file_reader._cached_reads, date) do
Expand All @@ -188,16 +196,14 @@ function FileReaders.read(file_reader::NCFileReader, date::Dates.DateTime)
error("Problem with date $date in $(file_reader.file_path)")
index = index[]

return get!(file_reader._cached_reads, date) do
var = file_reader.dataset[file_reader.varname]
slicer = [
i == file_reader.time_index ? index : Colon() for
i in 1:length(NCDatasets.dimnames(var))
]
return file_reader.preprocess_func.(
file_reader.dataset[file_reader.varname][slicer...]
)
end
var = file_reader.dataset[file_reader.varname]
slicer = [
i == file_reader.time_index ? index : Colon() for
i in 1:length(NCDatasets.dimnames(var))
]
return file_reader.preprocess_func.(
file_reader.dataset[file_reader.varname][slicer...]
)
end

"""
Expand Down Expand Up @@ -226,4 +232,28 @@ function FileReaders.read(file_reader::NCFileReader)
end
end

"""
read!(dest, file_reader::NCFileReader)

Read and preprocess data (for static datasets), saving the output to `dest`.
"""
function FileReaders.read!(dest, file_reader::NCFileReader)
dest .= FileReaders.read(file_reader)
return nothing
end

"""
read!(dest, file_reader::NCFileReader, date::Dates.DateTime)

Read and preprocess the data at the given `date`, saving the output to `dest`.
"""
function FileReaders.read!(
dest,
file_reader::NCFileReader,
date::Dates.DateTime,
)
dest .= FileReaders.read(file_reader, date)
return nothing
end

end
2 changes: 2 additions & 0 deletions src/FileReaders.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ function NCFileReader end

function read end

function read! end

function available_dates end

function close_all_ncfiles end
Expand Down
12 changes: 12 additions & 0 deletions test/file_readers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ using NCDatasets
@test FileReaders.read(ncreader_u, DateTime(2021, 01, 01, 01)) ==
nc["u10n"][:, :, 2]

# Test read!
dest = copy(nc["u10n"][:, :, 2])
fill!(dest, 0)
FileReaders.read!(dest, ncreader_u, DateTime(2021, 01, 01, 01))
@test dest == nc["u10n"][:, :, 2]

# Test that we need to close all the variables to close the file
open_ncfiles =
Base.get_extension(
Expand Down Expand Up @@ -73,6 +79,12 @@ end

@test FileReaders.read(ncreader) == nc["u10n"][:, :]

# Test read!
dest = copy(nc["u10n"][:, :])
fill!(dest, 0)
FileReaders.read!(dest, ncreader)
@test dest == nc["u10n"][:, :]

@test isempty(FileReaders.available_dates(ncreader))

FileReaders.close_all_ncfiles()
Expand Down