Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3d9a55a
First draft for add/subtract contact freqs
dwhswenson Sep 27, 2017
bafa028
Add API naming to docs
dwhswenson Oct 26, 2017
6305be9
Merge branch 'master' of github.com:dwhswenson/contact_map into combi…
dwhswenson Oct 26, 2017
df00d48
bump to 0.2.1.dev0
dwhswenson Nov 3, 2017
896cf1f
Merge pull request #27 from dwhswenson/0.2.1.dev0
dwhswenson Nov 4, 2017
f484b8c
Merge branch 'master' into combining_maps
dwhswenson Nov 5, 2017
2ecd1c1
Stop traj storage; check_compat to ContactObj
dwhswenson Nov 5, 2017
927eb9b
Add test for [add|subtract]_contact_freq
dwhswenson Nov 5, 2017
a4d66fd
Merge pull request #3 from dwhswenson/combining_maps
dwhswenson Nov 5, 2017
0e51675
First steps toward serialization (needs testing!)
dwhswenson Nov 29, 2017
b0bebad
ContactMap serialization done (with tests)
dwhswenson Dec 6, 2017
dc3f830
Add hashes for Py3
dwhswenson Dec 20, 2017
992185f
Fix serialization problems in Py 3.6
dwhswenson Jan 7, 2018
97fa637
[codeclimate] Refactor from_dict deserialization
dwhswenson Jan 7, 2018
77cbf6e
Serialization for ContactFrequency
dwhswenson Jan 8, 2018
7e65d28
[codeclim] Reduce dupes in serialization tests
dwhswenson Jan 8, 2018
60a58eb
Serialization for ContactDifference
dwhswenson Jan 8, 2018
2ea6da0
Serialization docstrings
dwhswenson Jan 8, 2018
f2900c9
Fixes for local docs building
dwhswenson Jan 8, 2018
7e03cb4
Merge pull request #29 from dwhswenson/serialization
dwhswenson Jan 8, 2018
29239a2
Merge branch 'master' into docs
dwhswenson Jan 9, 2018
63a8ee3
Basic dask parallelization setup
dwhswenson Jan 16, 2018
0243b89
Update docs for dask/task-based
dwhswenson Jan 16, 2018
2594298
Add cluster notes in dask_contact_freq.ipynb
dwhswenson Jan 19, 2018
e9b9d16
Skeleton of tests for frequency_task
dwhswenson Jan 20, 2018
f135c31
Most of the tests for frequency_task
dwhswenson Jan 20, 2018
b641b3d
Merge branch 'dask' of github.com:dwhswenson/contact_map into dask
dwhswenson Jan 20, 2018
ae49021
Fix freq_task tests (still have 2 skipped)
dwhswenson Jan 20, 2018
44df300
Add .n_frames in ContactFreq serialization
dwhswenson Jan 20, 2018
d3fbb7d
DaskContactFrequency and tests
dwhswenson Jan 20, 2018
d8cd82b
fixes for tests
dwhswenson Jan 20, 2018
4250e6b
New test for ContactFreq.hash; update dask example
dwhswenson Jan 20, 2018
991451c
Merge pull request #30 from dwhswenson/dask
dwhswenson Jan 20, 2018
60e47bd
Merge branch 'master' into docs
dwhswenson Jan 20, 2018
1855466
update for the docs on dask integration
dwhswenson Jan 21, 2018
912312d
Add colorbar to default plotter
dwhswenson Jan 22, 2018
4aefa69
Add test coverage; refactor for codeclimate
dwhswenson Jan 22, 2018
13e49c2
[codeclim] remove extra text; [cov] diff plotting
dwhswenson Jan 22, 2018
55ba696
Skip matplotlib-based test when no matplotlib!
dwhswenson Jan 22, 2018
4ceef59
[codeclim] Refactor to separate ContactCount
dwhswenson Jan 22, 2018
dadd71e
Merge pull request #31 from dwhswenson/docs
dwhswenson Jan 22, 2018
e5581e4
Bump version to 0.3.0
dwhswenson Jan 22, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dask-worker-space
# netcdf outputs
*nc

Expand Down
2 changes: 1 addition & 1 deletion ci/conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package:
name: contact_map
# add ".dev0" for unreleased versions
version: "0.2.0"
version: "0.3.0"

source:
path: ../../
Expand Down
8 changes: 7 additions & 1 deletion contact_map/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
__version__ = version.version

from .contact_map import (
ContactMap, ContactFrequency, ContactDifference, ContactCount
ContactMap, ContactFrequency, ContactDifference
)

from .contact_count import ContactCount

from .min_dist import NearestAtoms, MinimumDistanceCounter

from .dask_runner import DaskContactFrequency

from . import plot_utils

# import concurrence
211 changes: 211 additions & 0 deletions contact_map/contact_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import scipy
import numpy as np
import pandas as pd

from .plot_utils import ranged_colorbar

# matplotlib is technically optional, but required for plotting
try:
import matplotlib
import matplotlib.pyplot as plt
except ImportError:
HAS_MATPLOTLIB = False
else:
HAS_MATPLOTLIB = True

def _colorbar(with_colorbar, cmap_f, norm, min_val):
if with_colorbar is False:
return None
elif with_colorbar is True:
cbmin = np.floor(min_val) # [-1.0..0.0] => -1; [0.0..1.0] => 0
cbmax = 1.0
cb = ranged_colorbar(cmap_f, norm, cbmin, cbmax)
# leave open other inputs to be parsed later (like tuples)
return cb

class ContactCount(object):
"""Return object when dealing with contacts (residue or atom).

This contains all the information about the contacts of a given type.
This information can be represented several ways. One is as a list of
contact pairs, each associated with the fraction of time the contact
occurs. Another is as a matrix, where the rows and columns label the
pair number, and the value is the fraction of time. This class provides
several methods to get different representations of this data for
further analysis.

In general, instances of this class shouldn't be created by a user using
``__init__``; instead, they will be returned by other methods. So users
will often need to use this object for analysis.

Parameters
----------
counter : :class:`collections.Counter`
the counter describing the count of how often the contact occurred;
key is a frozenset of a pair of numbers (identifying the
atoms/residues); value is the raw count of the number of times it
occurred
object_f : callable
method to obtain the object associated with the number used in
``counter``; typically :meth:`mdtraj.Topology.residue` or
:meth:`mdtraj.Topology.atom`.
n_x : int
number of objects in the x direction (used in plotting)
n_y : int
number of objects in the y direction (used in plotting)
"""
def __init__(self, counter, object_f, n_x, n_y):
self._counter = counter
self._object_f = object_f
self.n_x = n_x
self.n_y = n_y

@property
def counter(self):
"""
:class:`collections.Counter` :
keys use index number; count is contact occurrences
"""
return self._counter

@property
def sparse_matrix(self):
"""
:class:`scipy.sparse.dok.dok_matrix` :
sparse matrix representation of contacts

Rows/columns correspond to indices and the values correspond to
the count
"""
mtx = scipy.sparse.dok_matrix((self.n_x, self.n_y))
for (k, v) in self._counter.items():
key = list(k)
mtx[key[0], key[1]] = v
mtx[key[1], key[0]] = v
return mtx

@property
def df(self):
"""
:class:`pandas.SparseDataFrame` :
DataFrame representation of the contact matrix

Rows/columns correspond to indices and the values correspond to
the count
"""
mtx = self.sparse_matrix.tocoo()
index = list(range(self.n_x))
columns = list(range(self.n_y))
return pd.SparseDataFrame(mtx, index=index, columns=columns)

def plot(self, cmap='seismic', vmin=-1.0, vmax=1.0, with_colorbar=True):
"""
Plot contact matrix (requires matplotlib)

Parameters
----------
cmap : str
color map name, default 'seismic'
vmin : float
minimum value for color map interpolation; default -1.0
vmax : float
maximum value for color map interpolation; default 1.0

Returns
-------
fig : :class:`matplotlib.Figure`
matplotlib figure object for this plot
ax : :class:`matplotlib.Axes`
matplotlib axes object for this plot
"""
if not HAS_MATPLOTLIB: # pragma: no cover
raise RuntimeError("Error importing matplotlib")
norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
cmap_f = plt.get_cmap(cmap)

fig, ax = plt.subplots()
ax.axis([0, self.n_x, 0, self.n_y])
ax.set_facecolor(cmap_f(norm(0.0)))

min_val = 0.0

for (pair, value) in self.counter.items():
if value < min_val:
min_val = value
pair_list = list(pair)
patch_0 = matplotlib.patches.Rectangle(
pair_list, 1, 1,
facecolor=cmap_f(norm(value)),
linewidth=0
)
patch_1 = matplotlib.patches.Rectangle(
(pair_list[1], pair_list[0]), 1, 1,
facecolor=cmap_f(norm(value)),
linewidth=0
)
ax.add_patch(patch_0)
ax.add_patch(patch_1)

_colorbar(with_colorbar, cmap_f, norm, min_val)

return (fig, ax)

def most_common(self, obj=None):
"""
Most common values (ordered) with object as keys.

This uses the objects for the contact pair (typically MDTraj
``Atom`` or ``Residue`` objects), instead of numeric indices. This
is more readable and can be easily used for further manipulation.

Parameters
----------
obj : MDTraj Atom or Residue
if given, the return value only has entries including this
object (allowing one to, for example, get the most common
contacts with a specific residue)

Returns
-------
list :
the most common contacts in order. If the list is ``l``, then
each element ``l[e]`` is a tuple with two parts: ``l[e][0]`` is
the key, which is a pair of Atom or Residue objects, and
``l[e][1]`` is the count of how often that contact occurred.

See also
--------
most_common_idx : same thing, using index numbers as key
"""
if obj is None:
result = [
([self._object_f(idx) for idx in common[0]], common[1])
for common in self.most_common_idx()
]
else:
obj_idx = obj.index
result = [
([self._object_f(idx) for idx in common[0]], common[1])
for common in self.most_common_idx()
if obj_idx in common[0]
]
return result

def most_common_idx(self):
"""
Most common values (ordered) with indices as keys.

Returns
-------
list :
the most common contacts in order. The if the list is ``l``,
then each element ``l[e]`` consists of two parts: ``l[e][0]`` is
a pair of integers, representing the indices of the objects
associated with the contact, and ``l[e][1]`` is the count of how
often that contact occurred

See also
--------
most_common : same thing, using objects as key
"""
return self._counter.most_common()
Loading