-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSConstruct-get-data
93 lines (76 loc) · 2.9 KB
/
SConstruct-get-data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
from os import path
import sys
import SCons
from SCons.Script import (Environment, Variables, Help, Decider, Dir)
from bioscons.fileutils import rename
import common
# TODO: move to bioscons
SCons.Script.EnsureSConsVersion(3, 0, 1)
# Ensure that a virtualenv is active before importing non-stdlib dependencies.
if not os.environ.get('VIRTUAL_ENV'):
sys.exit('--> an active virtualenv is required')
# check timestamps before calculating md5 checksums
Decider('MD5-timestamp')
# ############### start inputs ################
user_args, conf = common.get_conf('settings.conf')
outdir = conf['get_data'].get('outdir', 'dada2')
plates = conf['get_data'].get('plates', '').strip().split()
assert bool(plates), 'must provide a value for conf["get_data"]["plates"]'
projects = conf['get_data'].get('projects', '').strip().splitlines()
refpkg = conf['get_data'].get('refpkg')
sample_info = [path.join('/fh/fast/fredricks_d/bvdiversity/data/dada2_nf_out',
plate, 'output', 'sample_information.csv')
for plate in plates]
singularity = conf['singularity'].get('singularity', 'singularity')
dada2_img = conf['singularity']['dada2']
binds = [os.path.abspath(pth)
for pth in conf['singularity']['binds'].strip().splitlines()]
# ############### end inputs ##################
vars = Variables()
vars.Add('out', '', outdir)
env = Environment(
ENV=os.environ,
variables=vars,
SHELL='bash',
cwd=os.getcwd(),
singularity=singularity,
binds=' '.join('-B {}'.format(pth) for pth in ['$cwd'] + binds),
dada2_img=('$singularity exec $binds --pwd $cwd {}'.format(dada2_img)),
)
Help(vars.GenerateHelpText(env))
if projects:
env['projects'] = '--projects ' + ' '.join(['"{}"'.format(p) for p in projects])
sample_info, seqtabs = env.Command(
target=['$out/sample_info.csv', '$out/seqtabs.txt'],
source=sample_info,
action=('bin/gather_seqtabs.py $SOURCES '
'--sample-info ${TARGETS[0]} '
'--seqtabs ${TARGETS[1]} '
'$projects')
)
sv_fa, sv_table, weight, specimen_map, sv_table_long, specimen_table = env.Command(
target=['$out/seqs.fasta',
'$out/dada2_sv_table.csv',
'$out/weights.csv',
'$out/specimen_map.csv',
'$out/dada2_sv_table_long.csv',
'$out/specimen_table.csv'],
source=seqtabs,
action=('/fh/fast/fredricks_d/bvdiversity/data/dada2-nf/bin/write_seqs.py '
'--seqtablist $SOURCE '
'--seqs ${TARGETS[0]} '
'--sv-table ${TARGETS[1]} '
'--weights ${TARGETS[2]} '
'--specimen-map ${TARGETS[3]} '
'--sv-table-long ${TARGETS[4]} '
'--specimen-table ${TARGETS[5]} '
'--direction merged '
'-j 20')
)
if refpkg:
local_refpkg = env.Command(
target=Dir(rename(refpkg, pth='.')),
source=refpkg,
action='cp -r $SOURCE $TARGET'
)