Skip to content

Commit

Permalink
Experimental/bruker report (#2)
Browse files Browse the repository at this point in the history
* added report info
* split decompression step
* Included ms1 TIC/BPC
* added new data to report channel
* added convert_dotd to the schema
* fixed bug where passed mass accuracies were bypassed
* code formatting
  • Loading branch information
jspaezp authored Aug 17, 2023
1 parent 454b911 commit 85f3060
Show file tree
Hide file tree
Showing 17 changed files with 567 additions and 74 deletions.
37 changes: 37 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,43 @@ custom_logo: "./nf-core-quantms_logo_light.png"
custom_logo_url: "https://github.com/bigbio/quantms"
custom_logo_title: "quantms"

custom_data:
total_ion_chromatograms:
file_format: 'tsv'
section_name: 'MS1 TIC'
description: 'MS1 total ion chromatograms extracted from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_tic'
title: 'MS1 TIC'
ylab: 'Ion Count'
ymin: 0
base_peak_chromatograms:
file_format: 'tsv'
section_name: 'MS1 BPC'
description: 'MS1 base peak chromatograms extracted from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_bpc'
title: 'MS1 BPC'
ylab: 'Ion Count'
ymin: 0
number_of_peaks:
file_format: 'tsv'
section_name: 'MS1 Peaks'
description: 'MS1 Peaks from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_peaks'
title: 'MS1 Peaks'
ylab: 'Peak Count'
ymin: 0
sp:
total_ion_chromatograms:
fn: 'tic_*'
base_peak_chromatograms:
fn: 'bpc_*'
number_of_peaks:
fn: 'ms1_peaks_*'
quantms/exp_design:
fn: "*_design.tsv"
242 changes: 242 additions & 0 deletions bin/dotd_2_mqc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/env python
GENERAL_HELP = """
Converts .d files to multiqc compatible files.
Generates the following files:
- tic_<basename>.tsv
- bpc_<basename>.tsv
- ms1_peaks_<basename>.tsv
- general_stats_<basename>.tsv
- dotd_mqc.yml
Usage:
$ python dotd_2_mqc.py <input> <output>
$ cd <output>
$ multiqc -c dotd_mqc.yml .
"""

from typing import List, Tuple # noqa: E402
import os # noqa: E402
import sqlite3 # noqa: E402
import argparse # noqa: E402
from pathlib import Path # noqa: E402
from dataclasses import dataclass # noqa: E402
from logging import getLogger # noqa: E402

VERSION = "0.0.1"
logger = getLogger(__name__)

MQC_YML = """
custom_data:
total_ion_chromatograms:
file_format: 'tsv'
section_name: 'MS1 TIC'
description: 'MS1 total ion chromatograms extracted from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_tic'
title: 'MS1 TIC'
ylab: 'Ion Count'
ymin: 0
base_peak_chromatograms:
file_format: 'tsv'
section_name: 'MS1 BPC'
description: 'MS1 base peak chromatograms extracted from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_bpc'
title: 'MS1 BPC'
ylab: 'Ion Count'
ymin: 0
number_of_peaks:
file_format: 'tsv'
section_name: 'MS1 Peaks'
description: 'MS1 Peaks from the .d files'
plot_type: 'linegraph'
pconfig:
id: 'ms1_peaks'
title: 'MS1 Peaks'
ylab: 'Peak Count'
ymin: 0
sp:
total_ion_chromatograms:
fn: 'tic_*'
base_peak_chromatograms:
fn: 'bpc_*'
number_of_peaks:
fn: 'ms1_peaks_*'
general_stats:
fn: 'general_stats_*'
"""


@dataclass
class DotDFile:
filepath: os.PathLike

@property
def sql_filepath(self):
fp = Path(self.filepath) / "analysis.tdf"
return fp

@property
def basename(self):
return Path(self.filepath).stem

@property
def ms1_tic(self) -> List[Tuple[float, float]]:
"""Gets the MS1 total-ion-chromatogram.
Returns:
List[Tuple[float, float]]: List of (time, intensity) tuples.
"""
cmd = """
SELECT CAST(Time AS INTEGER), AVG(SummedIntensities)
FROM frames WHERE MsMsType = '0'
GROUP BY CAST(Time AS INTEGER)
ORDER BY Time
"""
conn = sqlite3.connect(self.sql_filepath)
c = conn.cursor()
out = c.execute(cmd).fetchall()
conn.close()
return out

@property
def ms1_bpc(self) -> List[Tuple[float, float]]:
"""Gets the MS1 base-peak-chromatogram.
Returns:
List[Tuple[float, float]]: List of (time, intensity) tuples.
"""
cmd = """
SELECT CAST(Time AS INTEGER), MAX(MaxIntensity)
FROM frames WHERE MsMsType = '0'
GROUP BY CAST(Time AS INTEGER)
ORDER BY Time
"""
conn = sqlite3.connect(self.sql_filepath)
c = conn.cursor()
out = c.execute(cmd).fetchall()
conn.close()
return out

@property
def ms1_peaks(self) -> List[Tuple[float, float]]:
"""Gets the number of MS1 peaks.
Returns:
List[Tuple[float, float]]: List of (time, intensity) tuples.
"""
cmd = """
SELECT CAST(Time AS INTEGER), AVG(NumPeaks)
FROM frames WHERE MsMsType = '0'
GROUP BY CAST(Time AS INTEGER)
ORDER BY Time
"""
conn = sqlite3.connect(self.sql_filepath)
c = conn.cursor()
out = c.execute(cmd).fetchall()
conn.close()
return out

def get_acquisition_datetime(self) -> str:
"""Gets the acquisition datetime
Returns
-------
str
The acquisition datetime in ISO 8601 format.
[('2023-08-06T06:23:19.141-08:00',)]
"""
cmd = "SELECT Value FROM GlobalMetadata WHERE key='AcquisitionDateTime'"
conn = sqlite3.connect(self.sql_filepath)
c = conn.cursor()
out = c.execute(cmd).fetchall()
conn.close()
if not len(out) == 1:
raise RuntimeError("More than one acquisition datetime found.")

return out[0][0]

def get_general_stats(self) -> dict:
"""Gets the general stats from the .d file.
Returns
-------
dict
A dictionary of general stats.
"""
out = {
"AcquisitionDateTime": self.get_acquisition_datetime(),
}
return out

def write_tables(self, location):
logger.info(f"Writing tables for {self.basename}")
logger.info(f"Writing tables to {location}")
location = Path(location)
location.mkdir(parents=True, exist_ok=True)
tic = self.ms1_tic
bpc = self.ms1_bpc
npeaks = self.ms1_peaks
general_stats = self.get_general_stats()
general_stats["TotCurrent"] = sum([i for t, i in tic])

tic_path = location / f"tic_{self.basename}.tsv"
bpc_path = location / f"bpc_{self.basename}.tsv"
peaks_location = location / f"ms1_peaks_{self.basename}.tsv"
general_stats_location = location / f"general_stats_{self.basename}.tsv"

logger.info(f"Writing {tic_path}")
with tic_path.open("w") as f:
for t, i in tic:
f.write(f"{t}\t{i}\n")

logger.info(f"Writing {bpc_path}")
with bpc_path.open("w") as f:
for t, i in bpc:
f.write(f"{t}\t{i}\n")

logger.info(f"Writing {peaks_location}")
with peaks_location.open("w") as f:
for t, i in npeaks:
f.write(f"{t}\t{i}\n")

logger.info(f"Writing {general_stats_location}")
with general_stats_location.open("w") as f:
for k, v in general_stats.items():
f.write(f"{k}\t{v}\n")


if __name__ == "__main__":
parser = argparse.ArgumentParser(add_help=True, usage=GENERAL_HELP)
parser.add_argument("input", help="Input .d file or directory of .d files.")
parser.add_argument("output", help="Output directory.")
parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}")

args, unkargs = parser.parse_known_args()

if unkargs:
print(f"Unknown arguments: {unkargs}")
raise RuntimeError("Unknown arguments.")

input_path = Path(args.input)
output_path = Path(args.output)

if input_path.is_dir() and str(input_path).endswith(".d"):
input_files = [input_path]
elif input_path.is_dir():
input_files = list(input_path.glob("*.d"))
else:
raise RuntimeError(f"Input path {input_path} is not a file or directory.")

output_path.mkdir(parents=True, exist_ok=True)

for f in input_files:
d = DotDFile(f)
d.write_tables(output_path)

logger.info(f"Writing {output_path / 'dotd_mqc.yml'}")
with (output_path / "dotd_mqc.yml").open("w") as f:
f.write(MQC_YML)
16 changes: 12 additions & 4 deletions modules/local/assemble_empirical_library/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,21 @@ process ASSEMBLE_EMPIRICAL_LIBRARY {

script:
def args = task.ext.args ?: ''
mass_acc_ms1 = meta.precursor_mass_tolerance_unit == "ppm" ? meta.precursor_mass_tolerance : 5
mass_acc_ms2 = meta.fragment_mass_tolerance_unit == "ppm" ? meta.fragment_mass_tolerance : 13
mass_acc_ms1 = meta['precursormasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['precursormasstolerance'] : 5
mass_acc_ms2 = meta['fragmentmasstoleranceunit'].toLowerCase().endsWith('ppm') ? meta['fragmentmasstolerance'] : 13

mass_acc = params.mass_acc_automatic ? "--quick-mass-acc --individual-mass-acc" : "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1"
scan_window = params.scan_window_automatic ? "--individual-windows" : "--window $params.scan_window"
if (params.mass_acc_automatic) {
mass_acc = "--quick-mass-acc --individual-mass-acc"
} else {
mass_acc = "--mass-acc $mass_acc_ms2 --mass-acc-ms1 $mass_acc_ms1"
}
scan_window = params.scan_window_automatic ? '--individual-windows' : "--window $params.scan_window"

"""
# Precursor Tolerance value was: ${meta['precursormasstolerance']}
# Fragment Tolerance value was: ${meta['fragmentmasstolerance']}
# Precursor Tolerance unit was: ${meta['precursormasstoleranceunit']}
# Fragment Tolerance unit was: ${meta['fragmentmasstoleranceunit']}
ls -lcth
Expand Down
71 changes: 71 additions & 0 deletions modules/local/decompress_dotd/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

process DECOMPRESS {
tag "$meta.mzml_id"
label 'process_low'
label 'process_single'
label 'error_retry'

container 'continuumio/miniconda3:23.5.2-0-alpine'

stageInMode {
if (task.attempt == 1) {
if (executor == "awsbatch") {
'symlink'
} else {
'link'
}
} else if (task.attempt == 2) {
if (executor == "awsbatch") {
'copy'
} else {
'symlink'
}
} else {
'copy'
}
}

input:
tuple val(meta), path(compressed_file)

output:
tuple val(meta), path("*.d"), emit: decompressed_files
path "versions.yml", emit: version
path "*.log", emit: log

script:
def prefix = task.ext.prefix ?: "${meta.mzml_id}"

"""
function extract {
if [ -z "\$1" ]; then
echo "Usage: extract <path/file_name>.<gz|tar|tar.bz2>"
else
if [ -f \$1 ]; then
case \$1 in
*.tar.gz) tar xvzf \$1 ;;
*.gz) gunzip \$1 ;;
*.tar) tar xvf \$1 ;;
*) echo "extract: '\$1' - unknown archive method" ;;
esac
else
echo "\$1 - file does not exist"
fi
fi
}
tar --help 2>&1 | tee -a ${prefix}_decompression.log
gunzip --help 2>&1 | tee -a ${prefix}_decompression.log
echo "Unpacking..." | tee -a ${compressed_file.baseName}_decompression.log
extract ${compressed_file} 2>&1 | tee -a ${compressed_file.baseName}_conversion.log
mv *.d ${file(compressed_file.baseName).baseName}.d
ls -l | tee -a ${compressed_file.baseName}_decompression.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gunzip: \$(gunzip --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?")
tar: \$(tar --help 2>&1 | head -1 | grep -oE "\\d+\\.\\d+(\\.\\d+)?")
END_VERSIONS
"""
}
Loading

0 comments on commit 85f3060

Please sign in to comment.