Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Release 1.4 (14 June 2013)
* Handle missing values from grib messages
https://github.com/SciTools/iris/pull/520

* PP export rule to calculate forecast period
* PP export rule to perform forecast period
https://github.com/SciTools/iris/pull/514

* Preserve masked arrays during aggregation
Expand Down Expand Up @@ -152,7 +152,7 @@ Features added
other than [-180, 180].
* Support for customised CF profiles on export to netCDF.
* The documentation now includes guidance on how to cite Iris.
* The ability to calculate the exponential of a Cube, via
* The ability to perform the exponential of a Cube, via
`iris.analysis.maths.exp()`.
* Experimental support for concatenating Cubes along existing dimensions
via `iris.experimental.concatenate.concatenate()`.
Expand Down
49 changes: 46 additions & 3 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

from os import environ
import resource
import tracemalloc

import numpy as np

ARTIFICIAL_DIM_SIZE = int(10e3) # For all artificial cubes, coords etc.

Expand Down Expand Up @@ -66,24 +69,44 @@ class TrackAddedMemoryAllocation:

"""

RESULT_MINIMUM_MB = 5.0
_DEFAULT_RESULT_MINIMUM_MB = 5.0
_DEFAULT_RESULT_ROUND_DP = 1

def __init__(self, use_tracemalloc=False, result_min_mb=None, result_round_dp=None):
self._use_tracemalloc = use_tracemalloc
if result_min_mb is None:
result_min_mb = self._DEFAULT_RESULT_MINIMUM_MB
self.RESULT_MINIMUM_MB = result_min_mb
if result_round_dp is None:
result_round_dp = self._DEFAULT_RESULT_ROUND_DP
self.RESULT_ROUND_DP = result_round_dp

@staticmethod
def process_resident_memory_mb():
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0

def __enter__(self):
self.mb_before = self.process_resident_memory_mb()
if self._use_tracemalloc:
self.mb_before = 0
tracemalloc.start()
else:
self.mb_before = self.process_resident_memory_mb()
return self

def __exit__(self, *_):
self.mb_after = self.process_resident_memory_mb()
if self._use_tracemalloc:
_, peak_mem = tracemalloc.get_traced_memory()
tracemalloc.stop()
self.mb_after = peak_mem * 1.0 / 1024**2
else:
self.mb_after = self.process_resident_memory_mb()

def addedmem_mb(self):
"""Return measured memory growth, in Mb."""
result = self.mb_after - self.mb_before
# Small results are too vulnerable to noise being interpreted as signal.
result = max(self.RESULT_MINIMUM_MB, result)
result = np.round(result, self.RESULT_ROUND_DP)
return result

@staticmethod
Expand Down Expand Up @@ -124,3 +147,23 @@ def on_demand_benchmark(benchmark_object):
"""
if "ON_DEMAND_BENCHMARKS" in environ:
return benchmark_object


def memtrace_benchmark(use_tracemalloc=False, result_min_mb=None):
# Call which returns a decorator == 'decorator with args'.
# N.B. embeds the the call argument in the env of the decorator returned
from functools import wraps

def decorator(decorated_func):
assert decorated_func.__name__[:6] == "track_"

@wraps(decorated_func)
def wrapper(*args, **kwargs):
with TrackAddedMemoryAllocation(
_use_tracemalloc=use_tracemalloc, result_min_mb=result_min_mb
):
result = decorated_func(*args, **kwargs)

return wrapper

return decorator
113 changes: 113 additions & 0 deletions benchmarks/benchmarks/memtrace_evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Benchmarks to evaluate tracemalloc/rss methods of memory measurement."""

from .. import TrackAddedMemoryAllocation
from .memory_exercising_task import SampleParallelTask


class MemcheckCommon:
# Basic controls over the test calculation
default_params = {
"measure": "tracemalloc", # alternate: "rss"
"runtype": "threads", # alternate: "processes"
"ysize": 10000,
"nx": 2000,
"nblocks": 6,
"nworkers": 4,
}

def _setup(self, **kwargs):
params = self.default_params.copy()
params.update(kwargs)
measure = params["measure"]
runtype = params["runtype"]
ysize = params["ysize"]
nx = params["nx"]
nblocks = params["nblocks"]
nworkers = params["nworkers"]

nyfull = ysize // nblocks
use_processes = {"threads": False, "processes": True}[runtype]
self.task = SampleParallelTask(
n_blocks=nblocks,
outerdim=nyfull // nblocks,
innerdim=nx,
n_workers=nworkers,
use_process_workers=use_processes,
)
self.use_tracemalloc = {"tracemalloc": True, "rss": False}[measure]

def run_time_calc(self):
# This usage is a bit crap, as we don't really care about the runtype.
self.task.perform()

def run_addedmem_calc(self):
with TrackAddedMemoryAllocation(
use_tracemalloc=self.use_tracemalloc,
result_min_mb=0.0,
) as tracer:
self.task.perform()
return tracer.addedmem_mb()


def memory_units_mib(func):
func.unit = "Mib"
return func


class MemcheckRunstyles(MemcheckCommon):
# only some are parametrised, or it's just too complicated!
param_names = [
"measure",
"runtype",
"ysize",
]
params = [
# measure
["tracemalloc", "rss"],
# runtype
["threads", "processes"],
# ysize
[10000, 40000],
]

def setup(self, measure, runtype, ysize):
self._setup(measure=measure, runtype=runtype, ysize=ysize)

def time_calc(self, measure, runtype, ysize):
self.run_time_calc()

@memory_units_mib
def track_addmem_calc(self, measure, runtype, ysize):
return self.run_addedmem_calc()


class MemcheckBlocksAndWorkers(MemcheckCommon):
# only some are parametrised, or it's just too complicated!
param_names = [
"nblocks",
"nworkers",
]
params = [
# nblocks
[1, 4, 9],
# nworkers
[1, 4, 9],
]

def setup(self, nblocks, nworkers):
self.default_params["ysize"] = 20000
self._setup(
nblocks=nblocks,
nworkers=nworkers,
)

def time_calc(self, nblocks, nworkers):
self.run_time_calc()

@memory_units_mib
def track_addmem_calc(self, nblocks, nworkers):
return self.run_addedmem_calc()
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Provide standard parallel calculations for testing the memory tracing
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

import numpy as np

"""
the basic operation is to for each worker to construct a (NY, NX) numpy
random array, of which it calculates and returns the mean(axis=0)
--> (NX,) result
The results are then collected --> (N_BLOCKS, NX),
and a mean over all calculated --> (NX,)
The final (single-value) result is the *minimum* of that.
"""

# _SHOW_DEBUG = True
_SHOW_DEBUG = False


def debug(msg):
if _SHOW_DEBUG:
print(msg)


def subtask_operation(arg):
i_task, ny, nx = arg
debug(f"\nRunning #{i_task}({ny}, {nx}) ..")
data = np.random.uniform(0.0, 1.0, size=(ny, nx))
sub_result = data.mean(axis=0)
debug(f"\n.. completed #{i_task}")
return sub_result


class SampleParallelTask:
def __init__(
self,
n_blocks=5,
outerdim=1000,
innerdim=250,
n_workers=4,
use_process_workers=False,
):
self.n_blocks = n_blocks
self.outerdim = outerdim
self.innerdim = innerdim
self.n_workers = n_workers
if use_process_workers:
self.pool_type = ProcessPoolExecutor
else:
self.pool_type = ThreadPoolExecutor
self._setup_calc()

def _setup_calc(self):
self._pool = self.pool_type(self.n_workers)

def perform(self):
partial_results = self._pool.map(
subtask_operation,
[
(i_task + 1, self.outerdim, self.innerdim)
for i_task in range(self.n_blocks)
],
)
combined = np.stack(list(partial_results))
result = np.mean(combined, axis=0)
result = result.min()
return result


if __name__ == "__main__":
nb = 12
nw = 3
ny, nx = 1000, 200
dims = (ny, nx)
use_processes = False
typ = "process" if use_processes else "thread"
msg = f"Starting: blocks={nb} workers={nw} size={dims} type={typ}"
print(msg)
calc = SampleParallelTask(
n_blocks=nb,
outerdim=ny,
innerdim=nx,
n_workers=nw,
use_process_workers=use_processes,
)
debug("Created.")
debug("Run..")
result = calc.perform()
debug("\n.. Run DONE.")
debug(f"result = {result}")
4 changes: 2 additions & 2 deletions docs/gallery_code/general/plot_custom_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
:meth:`~iris.cube.Cube.rolling_window`.

In this case, we have a 240-year sequence of yearly average surface temperature
over North America, and we want to calculate in how many years these exceed a
over North America, and we want to perform in how many years these exceed a
certain temperature over a spell of 5 years or more.

""" # noqa: D205, D212, D400
Expand All @@ -30,7 +30,7 @@
def count_spells(data, threshold, axis, spell_length):
"""Calculate the number of points in a sequence.

Function to calculate the number of points in a sequence where the value
Function to perform the number of points in a sequence where the value
has exceeded a threshold value for at least a certain number of timepoints.

Generalised to operate on multiple time sequences arranged on a specific
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def make_plot(projection_name, projection_crs):
iplt.contour(overlay_data, 20, linewidths=2.0, colors="darkgreen", linestyles="-")

# Draw a high resolution margin line, inset from the pcolormesh border.
# First calculate rectangle corners, 7% in from each corner of the data.
# First perform rectangle corners, 7% in from each corner of the data.
x_coord, y_coord = main_data.coord(axis="x"), main_data.coord(axis="y")
x_start, x_end = np.min(x_coord.points), np.max(x_coord.points)
y_start, y_end = np.min(y_coord.points), np.max(y_coord.points)
Expand Down
2 changes: 1 addition & 1 deletion docs/gallery_code/meteorology/plot_hovmoller.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

This example demonstrates the creation of a Hovmoller diagram with fine control
over plot ticks and labels. The data comes from the Met Office OSTIA project
and has been pre-processed to calculate the monthly mean sea surface
and has been pre-processed to perform the monthly mean sea surface
temperature.

""" # noqa: D205, D212, D400
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ We set up the NumPy arrays we will be filling with the output data::
output_arrays = [np.zeros(pressure.shape[0]) for _ in range(6)]
cape, cin, lcl, lfc, el, tpw = output_data

Now we loop over the columns in the data to calculate the soundings::
Now we loop over the columns in the data to perform the soundings::

for y in range(nlim):
for x in range(nlim):
Expand Down
4 changes: 2 additions & 2 deletions docs/src/further_topics/lenient_maths.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ spatial extent, and acts as a ``control``,
STASH m01s00i004
source 'Data from Met Office Unified Model 7.04'

Now let's subtract these cubes in order to calculate a simple ``difference``,
Now let's subtract these cubes in order to perform a simple ``difference``,

.. doctest:: lenient-example

Expand All @@ -134,7 +134,7 @@ Now let's subtract these cubes in order to calculate a simple ``difference``,

Note that, cube maths automatically takes care of broadcasting the
dimensionality of the ``control`` up to that of the ``experiment``, in order to
calculate the ``difference``. This is performed only after ensuring that both
perform the ``difference``. This is performed only after ensuring that both
the **dimension coordinates** ``grid_latitude`` and ``grid_longitude`` are first
:ref:`leniently equivalent <lenient equality>`.

Expand Down
6 changes: 3 additions & 3 deletions docs/src/further_topics/which_regridder_to_use.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ the following form:
+-----------------+-----------------------------------------------------------+
| **API** | Link to API documentation. |
+-----------------+-----------------------------------------------------------+
| **Method** | The type of algorithm used to calculate the result. |
| **Method** | The type of algorithm used to perform the result. |
| | See section on `comparing methods`_. |
+-----------------+-----------------------------------------------------------+
| **Source Grid** | The type of **coordinates** required on the ``src`` cube. |
Expand Down Expand Up @@ -324,7 +324,7 @@ the area weighted sum). More precisely, this means that::
to the area weighted average of the result.

This property will be particularly important to consider if you are intending to
calculate global properties such as average temperature or total rainfall over a
perform global properties such as average temperature or total rainfall over a
given area. It may be less important if you are only interested in local behaviour,
e.g., temperature at particular locations.

Expand All @@ -335,7 +335,7 @@ target. With the keyword argument ``mdtol=0`` this means that there will be an a
around the source mask which will be masked in the result and therefore unaccounted
for in the area weighted average calculation. Conversely, with the keyword argument
``mdtol=1`` there will be an unmasked area in the result that is masked in the source.
This may be particularly important if you are intending to calculate properties
This may be particularly important if you are intending to perform properties
which depend area e.g., calculating the total global rainfall based on data in units
of ``kg m-2`` as an area weighted sum. With ``mdtol=0`` this will consistently
underestimate this total and with ``mdtol=1`` will consistently overestimate. This can
Expand Down
Loading