Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b17cd31
Issue #38
IAlibay Jan 18, 2025
c6c3faf
fix dt reading
IAlibay Jan 19, 2025
2e2b27d
Add pyyaml as a dependency
IAlibay Jan 19, 2025
e4eaf79
try to fix packaging
IAlibay Jan 19, 2025
dc93caf
remove duplicate entry
IAlibay Jan 19, 2025
ef3d66c
Add tests for serialization utils
IAlibay Jan 19, 2025
5678982
Add tests for determine_dt
IAlibay Jan 19, 2025
963b583
Add a bunch more tests & checks, also fix cell_angles variable
IAlibay Jan 19, 2025
4133aeb
Add a test to cover the "no unit conversion" case
IAlibay Jan 19, 2025
a4cd953
Add ability to read negative state/replica id
IAlibay Jan 19, 2025
d200fc9
Add relevant code to deal with skipped frames
IAlibay Jan 20, 2025
6f8f715
My bad, didn't see the variable
IAlibay Jan 20, 2025
b8131c7
fix typo
IAlibay Jan 20, 2025
b20c047
Update src/openfe_analysis/reader.py
IAlibay Jan 23, 2025
9fc09e7
Address review comments
IAlibay Jan 23, 2025
5e75874
Apply suggestions from code review
IAlibay Jan 23, 2025
00ddea3
Merge branch 'fix_nsteps' into masked_frames
IAlibay Jan 23, 2025
d7f4ae9
Add a few things from the review
IAlibay Jan 23, 2025
e5870e3
Update src/openfe_analysis/utils/multistate.py
hannahbaumann Jan 23, 2025
331b47e
Switch to new results
hannahbaumann Jan 23, 2025
1329a58
Update test import
hannahbaumann Jan 23, 2025
19bc7ba
Testing out something
hannahbaumann Jan 23, 2025
9e38285
Add check for the presence of PositionInterval attr
hannahbaumann Jan 23, 2025
a78ac5b
Add test skipped velocities
hannahbaumann Jan 23, 2025
d2eface
Check if marking tests as flaky solves the problems
hannahbaumann Jan 23, 2025
e4964a6
Small fixes for tests
hannahbaumann Jan 23, 2025
1b9cd3c
Add pytest-rerunfailure
hannahbaumann Jan 23, 2025
0f5cfef
Remove unnecessary lines
hannahbaumann Jan 27, 2025
cd405e4
Add pytest.warn checks
hannahbaumann Jan 28, 2025
2d628d0
Add test for no positions at frame 1
hannahbaumann Jan 28, 2025
c709284
Small update test
hannahbaumann Jan 28, 2025
faa5546
Add test for time
hannahbaumann Jan 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ dependencies:
- openff-units
- pip
- tqdm
- pyyaml
# for testing
- coverage
- pooch
- pytest
- pytest-cov
- pytest-xdist
- pytest-rerunfailures
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ dependencies = [
'numpy',
'netCDF4',
'openff-units',
'pyyaml',
]
description="Analysis of free energy calculations."
description="Trajectory analysis of free energy calculations."
readme="README.md"
requires-python = ">=3.10"
classifiers = [
Expand All @@ -29,11 +30,11 @@ classifiers = [

[tool.setuptools]
zip-safe = false
include-package-data = true
license-files = ["LICENSE"]

[tool.setuptools.packages.find]
where = ["src"]
include = ["openfe_analysis"]

[project.optional-dependencies]
test = [
Expand Down
1 change: 0 additions & 1 deletion src/openfe_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from ._version import __version__

from . import handle_trajectories
from .reader import FEReader
from .transformations import (
NoJump,
Expand Down
156 changes: 112 additions & 44 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,106 @@
from MDAnalysis.coordinates.base import ReaderBase, Timestep
import netCDF4 as nc
from openff.units import unit
import numpy as np
import yaml
from typing import Optional


from . import handle_trajectories
from openfe_analysis.utils import multistate, serialization
from openfe_analysis.utils.multistate import _determine_position_indices


def _determine_dt(ds) -> float:
# first grab integrator timestep
mcmc_move_data = ds.groups['mcmc_moves']['move0'][0].split('\n')
in_timestep = False
for line in mcmc_move_data:
if line.startswith('timestep'):
in_timestep = True
if in_timestep and line.strip().startswith('value'):
timestep = float(line.split()[-1]) / 1000. # convert to ps
break
else:
raise ValueError("Didn't find timestep")
# next get the save interval
option_data = ds.variables['options'][0].split('\n')
for line in option_data:
if line.startswith('online_analysis_interval'):
nsteps = float(line.split()[-1])
break
else:
raise ValueError("Didn't find online_analysis_interval")
def _determine_iteration_dt(dataset) -> float:
"""
Find out the timestep between each frame in the trajectory.

Parameters
----------
dataset : nc.Dataset
Dataset holding the MultiStateReporter generated NetCDF file.

Returns
-------
float
The timestep in units of picoseconds.

Raises
------
KeyError
If either `timestep` or `n_steps` cannot be found in the
zeroth MCMC move.

Notes
-----
This assumes an MCMC move which serializes in a manner similar
to `openmmtools.mcmc.LangevinDynamicsMove`, i.e. it must have
both a `timestep` and `n_steps` defined.
"""
# Deserialize the MCMC move information for the 0th entry.
mcmc_move_data = yaml.load(
dataset.groups['mcmc_moves']['move0'][0],
Loader=serialization.UnitedYamlLoader,
)

try:
dt = mcmc_move_data['n_steps'] * mcmc_move_data['timestep']
except KeyError:
msg = "Either `n_steps` or `timestep` are missing from the MCMC move"
raise KeyError(msg)

return timestep * nsteps
return dt.to('picosecond').m


class FEReader(ReaderBase):
"""A MDAnalysis Reader for nc files created by openfe RFE Protocol
"""A MDAnalysis Reader for NetCDF files created by
`openmmtools.multistate.MultiStateReporter`

Looks along a multistate .nc file along one of two axes:
- constant state/lambda (varying replica)
- constant replica (varying lambda)
Looks along a multistate NetCDF file along one of two axes:
- constant state/lambda (varying replica)
- constant replica (varying lambda)
"""
_state_id: Optional[int]
_replica_id: Optional[int]
_frame_index: int
_dataset: nc.Dataset
_dataset_owner: bool

format = 'openfe RFE'
format = 'MultiStateReporter'

def __init__(self, filename, convert_units=True, **kwargs):
units = {
'time': 'ps',
'length': 'nanometer'
}

def __init__(
self, filename, convert_units=True,
state_id=None, replica_id=None, **kwargs
):
"""
Parameters
----------
filename : pathlike or nc.Dataset
path to the .nc file
convert_units : bool
convert positions to A
convert positions to Angstrom
state_id : Optional[int]
The Hamiltonian state index to extract. Must be defined if
``replica_id`` is not defined. May be negative (see notes below).
replica_id : Optional[int]
The replica index to extract. Must be defined if ``state_id``
is not defined. May be negative (see notes below).

Notes
-----
A negative index may be passed to either ``state_id`` or
``replica_id``. This will be interpreted as indexing in reverse
starting from the last state/replica. For example, passing a
value of -2 for ``replica_id`` will select the before last replica.
"""
self._state_id = kwargs.pop('state_id', None)
self._replica_id = kwargs.pop('replica_id', None)
if not ((self._state_id is None) ^ (self._replica_id is None)):
if not ((state_id is None) ^ (replica_id is None)):
raise ValueError("Specify one and only one of state or replica, "
f"got state id={self._state_id} "
f"replica_id={self._replica_id}")
f"got state id={state_id} "
f"replica_id={replica_id}")

super().__init__(filename, convert_units, **kwargs)

Expand All @@ -70,9 +110,23 @@
else:
self._dataset = nc.Dataset(filename)
self._dataset_owner = True

# Handle the negative ID case
if state_id is not None and state_id < 0:
state_id = range(self._dataset.dimensions['state'].size)[state_id]

if replica_id is not None and replica_id < 0:
replica_id = range(self._dataset.dimensions['replica'].size)[replica_id]

self._state_id = state_id
self._replica_id = replica_id

self._n_atoms = self._dataset.dimensions['atom'].size
self.ts = Timestep(self._n_atoms)
self._dt = _determine_dt(self._dataset)
self._frames = _determine_position_indices(self._dataset)
# The MDAnalysis trajectory "dt" is the iteration dt
# multiplied by the number of iterations between frames.
self._dt = _determine_iteration_dt(self._dataset) * np.diff(self._frames)[0]
self._read_frame(0)

@staticmethod
Expand All @@ -86,13 +140,12 @@

@property
def n_frames(self) -> int:
return self._dataset.dimensions['iteration'].size
return len(self._frames)

@staticmethod
def parse_n_atoms(filename, **kwargs) -> int:
with nc.Dataset(filename) as ds:
n_atoms = ds.dimensions['atom'].size

return n_atoms

def _read_next_timestep(self, ts=None) -> Timestep:
Expand All @@ -104,24 +157,39 @@
self._frame_index = frame

if self._state_id is not None:
rep = handle_trajectories._state_to_replica(
rep = multistate._state_to_replica(
self._dataset,
self._state_id,
self._frame_index
self._frames[self._frame_index]
)
else:
rep = self._replica_id

pos = handle_trajectories._replica_positions_at_frame(
pos = multistate._replica_positions_at_frame(
self._dataset,
rep,
self._frame_index)
dim = handle_trajectories._get_unitcell(
self._frames[self._frame_index]
)
dim = multistate._get_unitcell(
self._dataset,
rep,
self._frame_index)
self._frames[self._frame_index]
)

if pos is None:
errmsg = (

Check warning on line 180 in src/openfe_analysis/reader.py

View check run for this annotation

Codecov / codecov/patch

src/openfe_analysis/reader.py#L180

Added line #L180 was not covered by tests
"NetCDF dataset frame without positions was accessed "
"this likely indicates that the reader failed to work out "
"the write frequency and there is a deeper issue with how "
"this file was written."
)
raise RuntimeError(errmsg)

Check warning on line 186 in src/openfe_analysis/reader.py

View check run for this annotation

Codecov / codecov/patch

src/openfe_analysis/reader.py#L186

Added line #L186 was not covered by tests

self.ts.positions = (pos.to(unit.angstrom)).m
# Convert to base MDAnalysis distance units (Angstrom) if requested
if self.convert_units:
self.ts.positions = (pos.to(unit.angstrom)).m
else:
self.ts.positions = pos.m
self.ts.dimensions = dim
self.ts.frame = self._frame_index
self.ts.time = self._frame_index * self._dt
Expand Down
58 changes: 58 additions & 0 deletions src/openfe_analysis/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from importlib import resources
import pooch
import pytest


RFE_OUTPUT = pooch.create(
path=pooch.os_cache("openfe_analysis"),
base_url="doi:10.6084/m9.figshare.24101655",
registry={
"checkpoint.nc": "5af398cb14340fddf7492114998b244424b6c3f4514b2e07e4bd411484c08464",
"db.json": "b671f9eb4daf9853f3e1645f9fd7c18150fd2a9bf17c18f23c5cf0c9fd5ca5b3",
"hybrid_system.pdb": "07203679cb14b840b36e4320484df2360f45e323faadb02d6eacac244fddd517",
"simulation.nc": "92361a0864d4359a75399470135f56642b72c605069a4c33dbc4be6f91f28b31",
"simulation_real_time_analysis.yaml": "65706002f371fafba96037f29b054fd7e050e442915205df88567f48f5e5e1cf",
}
)


RFE_OUTPUT_skipped_frames = pooch.create(
path=pooch.os_cache("openfe_analysis_skipped"),
base_url="doi:10.6084/m9.figshare.28263203",
registry={
"hybrid_system.pdb": "77c7914b78724e568f38d5a308d36923f5837c03a1d094e26320b20aeec65fee",
"simulation.nc": "6749e2c895f16b7e4eba196261c34756a0a062741d36cc74925676b91a36d0cd",
}
)


@pytest.fixture(scope='session')
def simulation_nc():
return RFE_OUTPUT.fetch("simulation.nc")


@pytest.fixture(scope='session')
def simulation_skipped_nc():
return RFE_OUTPUT_skipped_frames.fetch("simulation.nc")


@pytest.fixture(scope='session')
def hybrid_system_pdb():
return RFE_OUTPUT.fetch("hybrid_system.pdb")


@pytest.fixture(scope='session')
def hybrid_system_skipped_pdb():
return RFE_OUTPUT_skipped_frames.fetch("hybrid_system.pdb")


@pytest.fixture(scope='session')
def mcmc_serialized():
return (
'_serialized__class_name: LangevinDynamicsMove\n'
'_serialized__module_name: openmmtools.mcmc\n'
'collision_rate: !Quantity\n unit: /picosecond\n value: 1\n'
'constraint_tolerance: 1.0e-06\nn_restart_attempts: 20\n'
'n_steps: 625\nreassign_velocities: false\n'
'timestep: !Quantity\n unit: femtosecond\n value: 4\n'
)
Empty file.
Loading
Loading