Skip to content

Commit

Permalink
Merge branch 'iss77' into dev-merge
Browse files Browse the repository at this point in the history
  • Loading branch information
arnikz committed Jan 25, 2021
2 parents 3a9be2d + 02e22f0 commit 2e2d085
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 38 deletions.
10 changes: 5 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ env:
- PREFIX=gtcg/xenon
- BAM=data/test.bam
- SEQIDS="12,22"
- SVTYPES="INV,DEL,INS,INV,DUP,CTX"
matrix:
- PORT=10022 IMG=gridengine TAG=6 SCH=local
- PORT=10023 IMG=gridengine TAG=6 SCH=gridengine
- PORT=10024 IMG=slurm TAG=19 SCH=slurm
- PORT=10024 IMG=gridengine TAG=6 SCH=local
- PORT=10025 IMG=gridengine TAG=6 SCH=gridengine
- PORT=10026 IMG=slurm TAG=19 SCH=slurm
install:
- docker run -d -p $PORT:22 --name $SCH $PREFIX-$IMG:$TAG
- sleep 10
Expand All @@ -28,5 +29,4 @@ install:
- docker exec -t $SCH chown -R xenon:xenon /home/xenon
- docker exec -u xenon -t $SCH ./install.sh
script:
- docker exec -u xenon -t $SCH ./run.sh $SCH $BAM $SEQIDS
- sonar-scanner
- docker exec -u xenon -t $SCH ./run.sh $SCH $BAM $SEQIDS $SVTYPES
26 changes: 14 additions & 12 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
set -xe

# check input arg(s)
if [ $# -ne "3" ]; then
echo "Usage: $0 [SCHEDULER {local,gridengine,slurm}] [BAM file] [SEQID1,2,...]"
if [ $# -ne "4" ]; then
echo "Usage: $0 [SCHEDULER] [BAM file] [SEQIDS] [SVTYPES]"
exit 1
fi

# set variables
SCH=$1 # scheduler type
SCH=$1 # scheduler types: local, gridengine or slurm
BAM="$(realpath -s "$2")"
BASE_DIR="$(dirname "$BAM")"
SAMPLE="$(basename "$BAM" .bam)"
SEQ_IDS=$3
SV_TYPES=(DEL) # INS INV DUP CTX)
SEQ_IDS_CSV=$3 # e.g., chromosomes: 1,2,...X,Y
SV_TYPES_CSV=$4 # INV,DEL,INS,INV,DUP,CTX
SEQ_IDS=($(echo "$SEQ_IDS_CSV" | tr ',' ' '))
SV_TYPES=($(echo "$SV_TYPES_CSV" | tr ',' ' '))
SV_CALLS=(split_reads gridss) # manta delly lumpy)
CV_MODES=(kfold chrom) # cross validation modes
KMERS=19
Expand Down Expand Up @@ -119,7 +121,7 @@ cd ../genome_wide
p=clipped_reads
cmd="python $p.py \
-b \"$BAM\" \
-c \"$SEQ_IDS\" \
-c \"$SEQ_IDS_CSV\" \
-o $p.json.gz \
-p . \
-l $p.log"
Expand All @@ -129,7 +131,7 @@ JOBS+=($JOB_ID)
p=clipped_read_pos
cmd="python $p.py \
-b \"$BAM\" \
-c \"$SEQ_IDS\" \
-c \"$SEQ_IDS_CSV\" \
-o $p.json.gz \
-p . \
-l $p.log"
Expand All @@ -139,15 +141,15 @@ JOBS+=($JOB_ID)
p=split_reads
cmd="python $p.py \
-b \"$BAM\" \
-c \"$SEQ_IDS\" \
-c \"$SEQ_IDS_CSV\" \
-o $p.json.gz \
-ob $p.bedpe.gz \
-p . \
-l $p.log"
JOB_ID=$(submit "$cmd" "$p")
JOBS+=($JOB_ID)

for s in $(echo "$SEQ_IDS" | tr ',' ' '); do # per chromosome
for s in "${SEQ_IDS[@]}"; do # per chromosome
p=clipped_read_distance
cmd="python $p.py \
-b \"$BAM\" \
Expand Down Expand Up @@ -183,7 +185,7 @@ done
waiting

# generate chromosome arrays from the channels as well as label window pairs
for s in $(echo "$SEQ_IDS" | tr ',' ' '); do
for s in "${SEQ_IDS[@]}"; do
p=chr_array
cmd="python $p.py \
-b \"$BAM\" \
Expand Down Expand Up @@ -228,7 +230,7 @@ for c in "${SV_CALLS[@]}"; do
lb="$out/labels.json.gz"
cmd="python $p.py \
-b \"$BAM\" \
-c \"$SEQ_IDS\" \
-c \"$SEQ_IDS_CSV\" \
-lb \"$lb\" -ca . \
-w $WIN_SZ \
-p \"$out\" \
Expand Down Expand Up @@ -328,7 +330,7 @@ waiting
# done
# done

waiting
# waiting

ETIME=$(date +%s)
echo "Processing ${#JOBS[@]} jobs took $((ETIME - STIME)) sec to complete."
Expand Down
30 changes: 12 additions & 18 deletions scripts/genome_wide/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,6 @@ def get_chr_list():
return chrlist


# def get_chr_list():
#
# chrlist = ['chr' + str(c) for c in list(np.arange(1, 23))]
# chrlist.extend(['chrX', 'chrY'])
#
# return chrlist


def get_chr_len_dict(ibam):
chr_list = get_chr_list()
# check if the BAM file exists
Expand Down Expand Up @@ -347,8 +339,9 @@ def get_filepath(vec_type):
return os.path.join(output_dir, vec_type, vec_type + '.json.gz')

logging.info('Loading SR positions')
chrlist = get_chr_list()
chr_list = chrlist # if sampleName != 'T1' else ['17']

total_reads_coord_min_support = []
chr_list = get_chr_list()

with gzip.GzipFile(get_filepath('split_reads'), 'rb') as fin:
positions_with_min_support_ls, positions_with_min_support_rs, total_reads_coord_min_support_json, \
Expand All @@ -359,14 +352,15 @@ def get_filepath(vec_type):
left_clipped_pos_cnt, right_clipped_pos_cnt = json.loads(
fin.read().decode('utf-8'))

if svtype == 'DEL':
total_reads_coord_min_support = total_reads_coord_min_support_json['DEL'] + \
total_reads_coord_min_support_json['INDEL_DEL']
elif svtype == 'INS':
total_reads_coord_min_support = total_reads_coord_min_support_json['INS'] + \
total_reads_coord_min_support_json['INDEL_INS']
else:
total_reads_coord_min_support = total_reads_coord_min_support_json[svtype]
if svtype in total_reads_coord_min_support_json:
if svtype in ('DEL', 'INDEL_DEL'):
total_reads_coord_min_support = total_reads_coord_min_support_json['DEL'] + \
total_reads_coord_min_support_json['INDEL_DEL']
elif svtype in ('INS', 'INDEL_INS'):
total_reads_coord_min_support = total_reads_coord_min_support_json['INS'] + \
total_reads_coord_min_support_json['INDEL_INS']
else:
total_reads_coord_min_support = total_reads_coord_min_support_json[svtype]

locations_sr = dict()
locations_cr_r = dict()
Expand Down
10 changes: 7 additions & 3 deletions scripts/genome_wide/label_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,13 @@ def overlap(svtype, sv_list, cpos_list, win_hlen, ground_truth, outDir):
labels[pos_id] = 'no' + svtype

logging.info(Counter(labels.values()))
sv_coverage = int(len(sv_covered) / len(sv_list) * 100)
logging.info("SV coverage: %d/%d=%d" %
(len(sv_covered), len(sv_list), sv_coverage))

try:
sv_coverage = int(len(sv_covered) / len(sv_list) * 100)
except ZeroDivisionError:
sv_coverage = 0
logging.info("SV coverage: %d%%" % sv_coverage)

filename, file_extension = os.path.splitext(ground_truth)

if file_extension == '.bedpe':
Expand Down

0 comments on commit 2e2d085

Please sign in to comment.