-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add concat_text component * Minor cleanup * Update src/concat_text/config.vsh.yaml * Update in line with most of Dries' comments * Don't claim to cleanup if it ain't happening... * Add authorship * Avoid using deprecated Viash variable --------- Co-authored-by: Dries Schaumont <[email protected]>
- Loading branch information
1 parent
781f668
commit a2e8c5d
Showing
5 changed files
with
180 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
name: Dries Schaumont | ||
info: | ||
links: | ||
email: [email protected] | ||
github: DriesSchaumont | ||
orcid: "0000-0002-4389-0440" | ||
linkedin: dries-schaumont | ||
organizations: | ||
- name: Data Intuitive | ||
href: https://www.data-intuitive.com | ||
role: Data Scientist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: Toni Verbeiren | ||
info: | ||
links: | ||
github: tverbeiren | ||
linkedin: verbeiren | ||
organizations: | ||
- name: Data Intuitive | ||
href: https://www.data-intuitive.com | ||
role: Data Scientist and CEO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
name: concat_text | ||
description: | | ||
Concatenate a number of text files, handle gzipped text files gracefully and | ||
optionally gzip the output text file. | ||
This component is useful for concatening fastq files from different lanes, for instance. | ||
authors: | ||
- __merge__: /src/_authors/toni_verbeiren.yaml | ||
roles: [ author, maintainer ] | ||
- __merge__: /src/_authors/dries_schaumont.yaml | ||
roles: [ reviewer ] | ||
info: | ||
improvements: | | ||
This component could be improved in 2 ways: | ||
1. Allow for a mix of zipped and plain input files | ||
2. Allow to specify a compression algorithm for the output | ||
argument_groups: | ||
- name: Input arguments | ||
arguments: | ||
- name: --input | ||
description: A list of (gzipped) text files. | ||
type: file | ||
multiple: true | ||
required: true | ||
example: input?.txt.gz | ||
- name: Output arguments | ||
arguments: | ||
- name: "--gzip_output" | ||
type: boolean_true | ||
description: Should the output be zipped? | ||
- name: --output | ||
description: File to write the output to, optionally gzipped. | ||
type: file | ||
direction: output | ||
example: output.txt | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
|
||
engines: | ||
- type: docker | ||
image: alpine:latest | ||
setup: | ||
- type: apk | ||
packages: | ||
- bash | ||
- procps | ||
- file | ||
|
||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -euo pipefail | ||
|
||
TMPDIR=$(mktemp -d "$meta_temp_dir/concat_text-XXXXXX") | ||
function clean_up { | ||
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" | ||
} | ||
trap clean_up EXIT | ||
|
||
par_input="$(echo "$par_input" | tr ';' ' ')" | ||
|
||
echo -n ">> Check if input is gzipped... " | ||
set +eo pipefail | ||
file $par_input | grep -q 'gzip' | ||
is_zipped="$?" | ||
set -euo pipefail | ||
[[ "$is_zipped" == "0" ]] && echo "yes" || echo "no" | ||
|
||
if [[ "$is_zipped" == "0" ]]; then | ||
echo ">> zcat gzipped files" | ||
zcat $par_input > $TMPDIR/contents | ||
else | ||
echo ">> cat plain files" | ||
cat $par_input > $TMPDIR/contents | ||
fi | ||
|
||
if [ "$par_gzip_output" == true ]; then | ||
echo ">> Zip output file" | ||
gzip $TMPDIR/contents | ||
mv $TMPDIR/contents.gz $par_output | ||
else | ||
mv $TMPDIR/contents $par_output | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -euo pipefail | ||
|
||
echo ">> Creating test input files file[1-3].txt" | ||
INPUT_FILE_1="file1.txt" | ||
INPUT_FILE_2="file2.txt" | ||
INPUT_FILE_3="file3.txt" | ||
echo "one" > "$INPUT_FILE_1" | ||
echo "two" > "$INPUT_FILE_2" | ||
echo "three" > "$INPUT_FILE_3" | ||
echo ">> Created input files" | ||
|
||
echo ">> Creating zipped versions at file[1-3].txt.gz" | ||
gzip -k $INPUT_FILE_1 | ||
gzip -k $INPUT_FILE_2 | ||
gzip -k $INPUT_FILE_3 | ||
|
||
echo ">> Creating expected output file expected_output.txt and zipped version" | ||
cat > "expected_output.txt" <<EOF | ||
one | ||
two | ||
three | ||
EOF | ||
|
||
gzip -k "expected_output.txt" | ||
|
||
echo ">> Run component on 3 plain input files, plain output" | ||
$meta_executable \ | ||
--input "$INPUT_FILE_1;$INPUT_FILE_2;$INPUT_FILE_3" \ | ||
--output "output1.txt" | ||
|
||
[[ ! -f "output1.txt" ]] \ | ||
&& echo "Output file output1.txt not found!" && exit 1 | ||
[[ $(cmp "output1.txt" "expected_output.txt") ]] \ | ||
&& echo "Output file output1.txt is not as expected!" && exit 1 | ||
|
||
echo ">> Run component on 3 zipped input files, plain output" | ||
$meta_executable \ | ||
--input "$INPUT_FILE_1.gz;$INPUT_FILE_2.gz;$INPUT_FILE_3.gz" \ | ||
--output "output2.txt" | ||
|
||
[[ ! -f "output2.txt" ]] \ | ||
&& echo "Output file output2.txt not found!" && exit 1 | ||
[[ $(cmp "output2.txt" "expected_output.txt") ]] \ | ||
&& echo "Output file output2.txt is not as expected!" && exit 1 | ||
|
||
echo ">> Run component on 3 plain input files, zipped output" | ||
$meta_executable \ | ||
--input "$INPUT_FILE_1;$INPUT_FILE_2;$INPUT_FILE_3" \ | ||
--output "output3.txt.gz" \ | ||
--gzip_output | ||
|
||
[[ ! -f "output3.txt.gz" ]] \ | ||
&& echo "Output file output3.txt.gz not found!" && exit 1 | ||
[[ $(cmp "output3.txt.gz" "expected_output.txt.gz") ]] \ | ||
&& echo "Output file output3.txt.gz is not as expected!" && exit 1 | ||
|
||
echo ">> Run component on 3 zipped input files, zipped output" | ||
$meta_executable \ | ||
--input "$INPUT_FILE_1.gz;$INPUT_FILE_2.gz;$INPUT_FILE_3.gz" \ | ||
--output "output4.txt.gz" \ | ||
--gzip_output | ||
|
||
[[ ! -f "output4.txt.gz" ]] \ | ||
&& echo "Output file output4.txt.gz not found!" && exit 1 | ||
[[ $(cmp "output4.txt.gz" "expected_output.txt.gz") ]] \ | ||
&& echo "Output file output4.txt.gz is not as expected!" && exit 1 | ||
|
||
echo ">> Tests done" |