Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
easyblock = 'PythonBundle'

name = 'ESM-2'
version = '2.0.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/facebookresearch/esm'
description = """ESM-2 outperforms all tested single-sequence protein language models
across a range of structure prediction tasks. ESMFold harnesses the ESM-2 language model to generate
accurate structure predictions end to end directly from the sequence of a protein."""

toolchain = {'name': 'foss', 'version': '2023a'}

builddependencies = [
('Java', '11', '', SYSTEM), # needed by ANTLR4 runtime
]

dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('PyTorch', '2.1.2', versionsuffix),
]

use_pip = True
sanity_pip_check = True

# omegaconf is required for esmfold (in addition to OpenFold-1.0.1)
exts_list = [
('antlr4-python3-runtime', '4.9.3', {
'modulename': 'antlr4',
'checksums': ['f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b'],
}),
('omegaconf', '2.3.0', {
'checksums': ['d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7'],
}),
('fair-esm', version, {
'modulename': "esm, esm.pretrained",
'checksums': ['4ed34d4598ec75ed6550a4e581d023bf8d4a8375317ecba6269bb68135f80c85'],
}),
]

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Thomas Hoffman, EMBL Heidelberg, structures-it@embl.de, 2023/11
easyblock = 'PythonPackage'

name = 'topaz'
_mainversion = '0.2.5'
_commitdate = '20231120'
_commit = '25cb2cb'
version = '%s.%s' % (_mainversion, _commitdate)
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'http://cb.csail.mit.edu/cb/topaz/'

description = """Particle picking software for single particle cryo-electron microscopy using
convolutional neural networks and positive-unlabeled learning. Includes methods
for micrograph denoising."""

toolchain = {'name': 'foss', 'version': '2023a'}

dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('SciPy-bundle', '2023.07'),
('PyTorch-bundle', '2.1.2', versionsuffix),
('scikit-learn', '1.3.1'),
('scikit-image', '0.22.0'),
]

source_urls = ['https://github.com/tbepler/topaz/archive']
sources = [{
'download_filename': '%s.tar.gz' % _commit,
'filename': '%(name)s-%(version)s.tar.gz'
}]
patches = [
'topaz-0.2.5_install_relion3_wrappers.patch',
'topaz-0.2.5.20231120_helical-filament-picking.patch',
'topaz-0.2.5.20231120_update-description.patch',
]
checksums = [
{'topaz-0.2.5.20231120.tar.gz': 'ca0630f9a69622eb3e10c9de310f58ac846e60a5504c4533398a9a75b3091df9'},
{'topaz-0.2.5_install_relion3_wrappers.patch': '0fe23a0ecaf887aaa89641a7e7cf37fafd3134384b0a8f46acb4e17537d1a151'},
{'topaz-0.2.5.20231120_helical-filament-picking.patch':
'320466e4ac1d1f06ba392a419aefa369905baa6877717868cd8ea7135b6bc28f'},
{'topaz-0.2.5.20231120_update-description.patch':
'073241dba2de63e543136a387ff7ef698a5e0139ab3356de021e370338fa57c1'},
]

download_dep_fail = True
use_pip = True
sanity_pip_check = True

options = {'modulename': 'topaz'}

_relion3_wrappers = ['denoise', 'pick', 'train', 'train_denoise']

sanity_check_paths = {
'files': ["bin/run_topaz_%s.py" % x for x in _relion3_wrappers],
'dirs': [],
}
sanity_check_commands = ['run_topaz_%s.py --help' % x for x in _relion3_wrappers] + ['topaz --help']

moduleclass = 'chem'
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
From 4ea1710c88648ff4c5f7232e20ad5d9aa9535241 Mon Sep 17 00:00:00 2001
From: scheres <scheres@mrc-lmb.cam.ac.uk>
Date: Wed, 5 Jan 2022 12:13:10 +0000
Subject: [PATCH] implemented helical filament picking in extract.py

---
topaz/commands/extract.py | 225 ++++++++++++++++++++++++++++++++++++--
1 file changed, 215 insertions(+), 10 deletions(-)

diff --git a/topaz/commands/extract.py b/topaz/commands/extract.py
index 3c3f032..6676618 100644
--- a/topaz/commands/extract.py
+++ b/topaz/commands/extract.py
@@ -52,6 +52,11 @@ def add_arguments(parser):
parser.add_argument('--targets', help='path to file specifying particle coordinates. used to find extraction radius that maximizes the AUPRC')
parser.add_argument('--only-validate', action='store_true', help='flag indicating to only calculate validation metrics. does not report full prediction list')

+ # Filament picking SHWS 30032021
+ parser.add_argument('-f', '--filaments', action='store_true', help='flag for filament start-end picking.')
+ parser.add_argument('-fp', '--filaments_plot', action='store_true', help='flag for filament start-end picking plus plotting of its intermediate stages (useful for tuning parameters).')
+ parser.add_argument('-fl', '--filaments_length', default=-1, type=int, help='minimum length of straight filament segments to be picked (in Angstrom) (default: twice --radius)')
+
parser.add_argument('-d', '--device', default=0, type=int, help='which device to use, <0 corresponds to CPU')

parser.add_argument('-o', '--output', help='file path to write')
@@ -63,24 +68,219 @@ def add_arguments(parser):

return parser

+
+def is_in_between(point, line):
+ dx = line[1][0] - line[0][0]
+ dy = line[1][1] - line[0][1]
+ dotp = (point[0] - line[0][0])*dx + (point[1] - line[0][1])*dy
+ return 0 <= dotp and dotp <= dx*dx + dy*dy
+
+def distance_point_line(point, line):
+ x1=line[0][0]
+ x2=line[1][0]
+ y1=line[0][1]
+ y2=line[1][1]
+ x0=point[0]
+ y0=point[1]
+ dd = abs( (x2-x1)*(y1-y0) - (x1-x0)*(y2-y1) ) / np.sqrt( (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1) )
+ if not is_in_between(point,line):
+ closest = min(distance_point_point(point,line[0]), distance_point_point(point,line[1]))
+ return max(closest, dd)
+ else:
+ return dd
+
+def distance_point_point(point1, point2):
+ x1=point1[0]
+ y1=point1[1]
+ x2=point2[0]
+ y2=point2[1]
+ return np.sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2))
+
+def angle_line_line(line1, line2):
+ a1 = np.arctan2(line1[1][1]-line1[0][1], line1[1][0]-line1[0][0])
+ a2 = np.arctan2(line2[1][1]-line2[0][1], line2[1][0]-line2[0][0])
+ return abs(a2-a1)
+
+def prune_lines(inlines, mind, min_ang, max_merge):
+ import math
+ lengths = []
+ merged = []
+ lines = np.asarray(inlines)
+ for line in lines:
+ lengths = np.append(lengths, np.linalg.norm(line[0]-line[1]))
+ merged = np.append(merged, 1)
+ sortidx = np.argsort(lengths);
+
+ min_ang_rad = math.radians(min_ang)
+ NN = len(sortidx)
+ idx = NN - 1
+ while idx >= 0:
+ i1 = sortidx[idx]
+
+ for i2 in range(NN):
+ if (i1 != i2 and lengths[i2] > 0. and lengths[i1] > 0.):
+
+ d11 = distance_point_line(lines[i1][0],lines[i2])
+ d12 = distance_point_line(lines[i1][1],lines[i2])
+ d21 = distance_point_line(lines[i2][0],lines[i1])
+ d22 = distance_point_line(lines[i2][1],lines[i1])
+ ang = angle_line_line(lines[i1], lines[i2])
+ sum = 0
+ if (d11 < mind):
+ sum = sum + 1
+ if (d12 < mind):
+ sum = sum + 1
+ if (d21 < mind):
+ sum = sum + 1
+ if (d22 < mind):
+ sum = sum + 1
+
+ # merge lines if they haven't been merged too often already, they overlap two or more points and are parallel
+ if ( (merged[i1] + merged[i2] < max_merge) and (sum >= 2) and (ang < min_ang_rad or abs(ang-math.pi) < min_ang_rad) ):
+
+ # select the two points with the furthest distance
+ maxd=0
+ for i in range(2):
+ for j in range(2):
+ d = distance_point_point(lines[i1][i], lines[i2][j])
+ if (d > maxd):
+ maxd = d
+ mymax0 = lines[i1][i]
+ mymax1 = lines[i2][j]
+ # perhaps original one was longer?
+ if maxd > lengths[i1]:
+ lines[i1][0] = mymax0
+ lines[i1][1] = mymax1
+ lengths[i1] = np.linalg.norm(lines[i1][0]-lines[i1][1])
+ merged[i1] = merged[i1] + merged[i2]
+
+ lines[i2][0][0] = -9999
+ lines[i2][0][1] = -9999
+ lines[i2][1][0] = -9999
+ lines[i2][1][1] = -9999
+ lengths[i2] = 0
+ merged[i2] = 0
+ idx = idx + 1
+ break
+
+ # remove smaller lines with both points close to longer one
+ elif (d21 < mind and d22 < mind):
+ lines[i2][0][0] = -9999
+ lines[i2][0][1] = -9999
+ lines[i2][1][0] = -9999
+ lines[i2][1][1] = -9999
+ lengths[i2] = 0
+
+ idx = idx - 1
+
+ return lines
+
+
+def pick_filaments(score, radius, threshold, filaments_length, filaments_plot):
+ from topaz import mrc
+ from skimage.filters import gaussian
+ from skimage.transform import probabilistic_hough_line
+ from skimage.morphology import skeletonize
+ import math
+
+ #Parameters
+ thr = round(0.1*filaments_length)
+ line_length = filaments_length
+ gap = radius
+ mind= radius
+ min_angle = 10
+ max_merge = 5
+
+ bin_score = (gaussian(score, 3) > threshold)
+ edges = skeletonize(bin_score)
+ houghs = probabilistic_hough_line(edges, threshold=thr, line_length=line_length, line_gap=gap)
+ lines = prune_lines(houghs, mind=mind, min_ang=min_angle, max_merge=max_merge)
+
+ if filaments_plot:
+ import matplotlib.pyplot as plt
+ from matplotlib import cm
+ fig, axes = plt.subplots(1, 4, figsize=(15, 5), sharex=True, sharey=True)
+ ax = axes.ravel()
+
+ ax[0].imshow(score, cmap=cm.binary, vmin=-20, vmax=5)
+ ax[0].imshow(bin_score, alpha=0.5, cmap=cm.Reds)
+ ax[0].set_title('FOM [-20,5] thr= ' + str(threshold))
+
+ ax[1].imshow(edges, cmap=cm.gray)
+ ax[1].set_title('Skeletonize')
+
+ ax[2].imshow(edges * 0)
+ for hough in houghs:
+ p0, p1 = hough
+ ax[2].plot((p0[0], p1[0]), (p0[1], p1[1]))
+ ax[2].set_xlim((0, score.shape[1]))
+ ax[2].set_ylim((score.shape[0], 0))
+ ax[2].set_title('Hough transform; len= ' + str(line_length) + ' gap= ' + str(gap))
+
+ ax[3].imshow(edges * 0)
+ for line in lines:
+ p0, p1 = line
+ ax[3].plot((p0[0], p1[0]), (p0[1], p1[1]))
+ ax[3].set_xlim((0, score.shape[1]))
+ ax[3].set_ylim((score.shape[0], 0))
+ ax[3].set_title('Prune: mind= ' + str(mind))
+
+ for a in ax:
+ a.set_axis_off()
+
+ plt.tight_layout()
+ plt.show()
+
+ oldlines=lines
+ # Flatten lines into 2D array, as rest of topaz
+ NN=len(lines)
+ if NN>0:
+ lines = lines.reshape(2*NN, 2)
+ # Remove -9999 coordinates
+ newlines = []
+ for i in range(2*NN):
+ if (lines[i,0] != -9999 and lines[i,1] != -9999):
+ newlines = np.append(newlines, lines[i])
+
+ NN=round(len(newlines)/2)
+ if (NN>0):
+ lines = newlines.reshape(NN, 2)
+ else:
+ lines = []
+
+ # Just set scores to zero, a they are meaningless now
+ scores = np.zeros(NN, dtype=np.float32)
+
+ return scores, lines
+
+
class NonMaximumSuppression:
- def __init__(self, radius, threshold):
+ def __init__(self, radius, threshold, do_filaments=False, filaments_length=150, filaments_plot=False):
self.radius = radius
self.threshold = threshold
+ self.do_filaments = do_filaments
+ self.filaments_length = filaments_length
+ self.filaments_plot = filaments_plot

def __call__(self, args):
name,score = args
- score,coords = non_maximum_suppression(score, self.radius, threshold=self.threshold)
- return name, score, coords
+ if self.do_filaments:
+ score,coords = pick_filaments(score, self.radius, threshold=self.threshold, length=self.filaments_length, filaments_plot = self.filaments_plot )
+ else:
+ score,coords = non_maximum_suppression(score, self.radius, threshold=self.threshold, length=self.filaments_length)
+ return name, core, coords

-def nms_iterator(scores, radius, threshold, pool=None):
- process = NonMaximumSuppression(radius, threshold)
+def nms_iterator(scores, radius, threshold, pool=None, do_filaments=False, filaments_length=150, filaments_plot=False):
+ process = NonMaximumSuppression(radius, threshold, do_filaments, filaments_length, filaments_plot)
if pool is not None:
for name,score,coords in pool.imap_unordered(process, scores):
yield name,score,coords
else:
for name,score in scores:
- score,coords = non_maximum_suppression(score, radius, threshold=threshold)
+ if do_filaments:
+ score,coords = pick_filaments(score, radius, threshold=threshold, filaments_length=filaments_length, filaments_plot=filaments_plot)
+ else:
+ score,coords = non_maximum_suppression(score, radius, threshold=threshold)
yield name,score,coords

def iterate_score_target_pairs(scores, targets):
@@ -231,6 +431,12 @@ def main(args):
if radius is None:
radius = -1

+ do_filaments = args.filaments or args.filaments_plot
+ filaments_length = args.filaments_length
+ if (filaments_length < 0):
+ filaments_length = 2 * radius
+ filaments_plot = args.filaments_plot
+
num_workers = args.num_workers
pool = None
if num_workers < 0:
@@ -284,12 +490,13 @@ def main(args):

if not per_micrograph:
print('image_name\tx_coord\ty_coord\tscore', file=f)
+
## extract coordinates using radius
- for path,score,coords in nms_iterator(stream, radius, threshold, pool=pool):
+ for path,score,coords in nms_iterator(stream, radius, threshold, pool=pool, do_filaments=do_filaments, filaments_length=filaments_length, filaments_plot=filaments_plot):
basename = os.path.basename(path)
name = os.path.splitext(basename)[0]
## scale the coordinates
- if scale != 1:
+ if scale != 1 and len(coords)>0:
coords = np.round(coords*scale).astype(int)

if per_micrograph:
@@ -303,8 +510,6 @@ def main(args):
print(name + '\t' + str(coords[i,0]) + '\t' + str(coords[i,1]) + '\t' + str(score[i]), file=f)


-
-
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser('Script for extracting particles from segmented images or images processed with a trained model. Uses a non maximum suppression algorithm.')
Loading