Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: xlsx file export issues (#1504) #1505

Merged
merged 1 commit into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ s3fs = "*"
toml = "*"
black = "*"
drf-spectacular = {extras = ["sidecar"], version = "*"}
rich = "*"

[dev-packages]
# packages for testing
Expand Down
619 changes: 297 additions & 322 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion utils/kickoff_orphaned_annotation_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#: Endpoint to smallvariant list-create query
ENDPOINT_SM_LISTCREATE_QUERY = "/variants/api/query/list-create/{case_uuid}"
#: Endpoint to smallvariant retrieve query
ENDPOINT_SM_RETRIEVE_QUERY = "/svs/api/query/retrieve-update-destroy/{query_uuid}/"
ENDPOINT_SM_RETRIEVE_QUERY = "/variants/api/query/retrieve-update-destroy/{query_uuid}/"
#: Endpoint to structuralvariant query settings
ENDPOINT_SV_SETTINGS = "/svs/ajax/query-case/query-settings-shortcut/{case_uuid}/?quick_preset=whole_genome&genotype_criteria=default&inheritance=any&frequency=any&impact=any&sv_type=any"
#: Endpoint to structuralvariant list-create query
Expand Down
294 changes: 294 additions & 0 deletions utils/projectwide_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
import json
import os
import sys
import time

import click
import requests
from rich.console import Console
import toml

#: Paths to search the global configuration in.
GLOBAL_CONFIG_PATHS = ("~/.varfishrc.toml",)

#: Endpoint to generate export tsv file
ENDPOINT_DOWNLOAD_GENERATE_TSV = "/variants/api/query-case/download/generate/tsv/{query_uuid}"
#: Endpoint to generate export vcf file
ENDPOINT_DOWNLOAD_GENERATE_VCF = "/variants/api/query-case/download/generate/vcf/{query_uuid}"
#: Endpoint to generate export xlsx file
ENDPOINT_DOWNLOAD_GENERATE_XLSX = "/variants/api/query-case/download/generate/xlsx/{query_uuid}"
#: Endpoint to download file
ENDPOINT_DOWNLOAD_SERVE = "/variants/api/query-case/download/serve/{sodar_uuid}"
#: Endpoint to check download status
ENDPOINT_DOWNLOAD_STATUS = "/variants/api/query-case/download/status/{sodar_uuid}"
#: Endpoint to case list
ENDPOINT_CASE_LIST = "/cases/api/case/list/{project_uuid}"
#: Endpoint to smallvariant query settings
ENDPOINT_SM_SETTINGS = "/variants/api/query-case/query-settings-shortcut/{case_uuid}"
#: Endpoint to smallvariant list-create query
ENDPOINT_SM_LISTCREATE_QUERY = "/variants/api/query/list-create/{case_uuid}"
#: Endpoint to smallvariant retrieve query
ENDPOINT_SM_RETRIEVE_QUERY = "/variants/api/query/retrieve-update-destroy/{query_uuid}/"
#: Endpoint to structuralvariant query settings
ENDPOINT_SV_SETTINGS = "/svs/ajax/query-case/query-settings-shortcut/{case_uuid}/?quick_preset=whole_genome&genotype_criteria=default&inheritance=any&frequency=any&impact=any&sv_type=any"
#: Endpoint to structuralvariant list-create query
ENDPOINT_SV_LISTCREATE_QUERY = "/svs/ajax/sv-query/list-create/{case_uuid}/"
#: Endpoint to structuralvariant retrieve query
ENDPOINT_SV_RETRIEVE_QUERY = "/svs/ajax/sv-query/retrieve-update-destroy/{query_uuid}/"

#: Sleep time between starting queries
SLEEP_RUN = 2
#: Sleep time between polling queries
SLEEP_POLL = 5

#: SQL database name for structural variants
SQL_SV_DB = "svs_structuralvariant"
#: SQL database name for small variants
SQL_SM_DB = "variants_smallvariant"
#: SQL query template to retrieve variants
SQL_QUERY_TEMPLATE = "select * from {db} left outer join variants_case on variants_case.id={db}.case_id where variants_case.sodar_uuid='{case_uuid}' and {db}.release='GRCh37' and chromosome='{chromosome}' and start={start};\n"


console = Console()


def connect_endpoint(config, endpoint, data=None):
token = config.get("global", {}).get("varfish_api_token")
base = config.get("global", {}).get("varfish_server_url").rstrip("/")
url = f"{base}{endpoint}"
headers = {"Authorization": f"Token {token}"}
if data:
response = requests.post(url, headers=headers, json=data)
else:
response = requests.get(url, headers=headers)
if response.status_code not in (
200,
201,
):
sys.stderr.write(f"Error: {url} responded with {response.status_code}\n")
return {}
return response


def read_toml():
for config_path in GLOBAL_CONFIG_PATHS:
config_path = os.path.expanduser(os.path.expandvars(config_path))
if os.path.exists(config_path):
with open(config_path, "rt") as tomlf:
return toml.load(tomlf)
else:
sys.stderr.write(
f"Could not find any of the global configuration files {GLOBAL_CONFIG_PATHS}"
)
sys.exit()


def run_query(
config,
settings_endpoint,
query_endpoint,
case_uuid,
gene,
region,
quick_preset,
inheritance,
frequency,
impact,
verbose,
):
url = settings_endpoint.format(case_uuid=case_uuid)
url += f"?quick_preset={quick_preset}"
if inheritance:
url += f"&inheritance={inheritance}"
if frequency:
url += f"&frequency={frequency}"
if impact:
url += f"&impact={impact}"
if verbose:
console.log(url)
response = connect_endpoint(config, url)
if not response:
return ""
response_json = response.json()
if gene:
response_json["query_settings"]["gene_allowlist"] = [gene]
elif region:
response_json["query_settings"]["genomic_region"] = region
url = query_endpoint.format(case_uuid=case_uuid)
response = connect_endpoint(config, url, data=response_json)
if not response:
return ""
response_json = response.json()
if not response_json.get("sodar_uuid"):
console.log(f"[bold red]Error: got no query uuid for case {case_uuid}[/bold red]")
return ""
time.sleep(SLEEP_RUN)
return response_json["sodar_uuid"]


def poll_query(config, query_endpoint, query_uuid):
url = query_endpoint.format(query_uuid=query_uuid)
response = connect_endpoint(config, url)
if not response:
return {"query_state": "fetching_failed"}
return response.json()


def poll_queries(config, query_endpoint, query_uuids):
poll_running = []
for query_uuid, data in query_uuids.items():
if not data["running"]:
continue
query_data = poll_query(config, query_endpoint, query_uuid)
data["state"] = query_data["query_state"]
data["logs"] = query_data["logs"]
data["running"] = query_data["query_state"] == "running"
poll_running.append(data["running"])
return any(poll_running)


def get_case_list(config, project_uuid):
url = ENDPOINT_CASE_LIST.format(project_uuid=project_uuid)
url_appendix = ""
case_query = {}
while True:
response = connect_endpoint(config, url + ("?" + (url_appendix if url_appendix else "")))
response_json = response.json()
for case in response_json["results"]:
case_query[case["sodar_uuid"]] = {"name": case["name"], "query": None}
if not response_json["next"]:
break
url_appendix = response_json["next"].split("?")[1]
return case_query


def download_generate(config, query_uuid, export_format):
if export_format == "tsv":
url = ENDPOINT_DOWNLOAD_GENERATE_TSV.format(query_uuid=query_uuid)
elif export_format == "vcf":
url = ENDPOINT_DOWNLOAD_GENERATE_VCF.format(query_uuid=query_uuid)
elif export_format == "xlsx":
url = ENDPOINT_DOWNLOAD_GENERATE_XLSX.format(query_uuid=query_uuid)
response = connect_endpoint(config, url)
if not response:
return
return response.json()["export_job__sodar_uuid"]


def download_status(config, sodar_uuid):
url = ENDPOINT_DOWNLOAD_STATUS.format(sodar_uuid=sodar_uuid)
response = connect_endpoint(config, url)
if not response:
return {}
return response.json()["status"]


def download_serve(config, sodar_uuid, name):
url = ENDPOINT_DOWNLOAD_SERVE.format(sodar_uuid=sodar_uuid)
response = connect_endpoint(config, url)
if not response:
return
with open(name, "wb") as fh:
fh.write(response.content)


@click.command()
@click.argument("project-uuid")
@click.option(
"--export-format",
default="xlsx",
type=click.Choice(["tsv", "vcf", "xlsx"], case_sensitive=False),
)
@click.option("--gene", default=None, help="Gene to filter on")
@click.option("--region", default=None, help="Region to filter on")
@click.option("--quick-preset", default="default", help="Quick preset to use")
@click.option("--inheritance", default=None, help="Inheritance preset to use")
@click.option("--frequency", default=None, help="Frequency preset to use")
@click.option("--impact", default=None, help="Impact preset to use")
@click.option("--verbose", is_flag=False, help="Verbose output")
def main(
project_uuid, export_format, gene, region, quick_preset, inheritance, frequency, impact, verbose
):
config = read_toml()
query_results = {}
case_query = {}

with console.status("[bold green]Starting ..."):
case_query = get_case_list(config, project_uuid)
console.log("Getting cases from project [bold green]done[/bold green]")

tasks = [
f"Starting query for [bold]{case_query[n]['name']}[/bold] ({n})" for n in case_query.keys()
]
with console.status("[bold green]Starting queries ..."):
for case_uuid in case_query.keys():
rich_query_start = tasks.pop(0)
query_uuid = run_query(
config,
ENDPOINT_SM_SETTINGS,
ENDPOINT_SM_LISTCREATE_QUERY,
case_uuid,
gene,
region,
quick_preset,
inheritance,
frequency,
impact,
verbose,
)
if not query_uuid:
console.log(f"{rich_query_start} [bold red]failed[/bold red]")
continue
query_results[query_uuid] = {
"running": True,
"state": "initial",
"case_uuid": case_uuid,
"logs": [],
}
case_query[case_uuid]["query"] = query_uuid
console.log(f"{rich_query_start} [bold green]done[/bold green]")

with console.status("[bold green]Waiting for queries to finish ..."):
while True:
polls_running = poll_queries(config, ENDPOINT_SM_RETRIEVE_QUERY, query_results)
if not polls_running:
break
time.sleep(SLEEP_POLL)

if verbose:
with open("query_logs.json", "w") as fh:
json.dump(query_results, fh, indent=1)

download_uuids = {}
tasks = [
f"Starting generation of export file for [bold]{case_query[query_results[n]['case_uuid']]['name']}[/bold] ({n})"
for n in query_results.keys()
]
with console.status("[bold green]Starting generation of export files ..."):
for query_uuid, data in query_results.items():
if data["state"] == "done":
download_uuid = download_generate(config, query_uuid, export_format)
if not download_uuid:
console.log(f"{tasks.pop(0)} [bold red]failed[/bold red]")
continue
download_uuids[download_uuid] = case_query[data["case_uuid"]]["name"]
console.log(f"{tasks.pop(0)} [bold green]done[/bold green]")
else:
console.log(f"{tasks.pop(0)} [bold red]failed[/bold red]")

downloads_running = [True] * len(download_uuids)
tasks = [f"Downloading file for [bold]{n}[/bold]" for n in download_uuids.values()]
with console.status("[bold green]Waiting for downloads to finish ..."):
while any(downloads_running):
for i, download_uuid in enumerate(download_uuids):
downloads_running[i] = download_status(config, download_uuid) == "running"
time.sleep(SLEEP_POLL)

for download_uuid, name in download_uuids.items():
download_serve(config, download_uuid, f"{name}.{export_format}")
console.log(f"{tasks.pop(0)} [bold green]done[/bold green]")
console.log(":heavy_check_mark: [bold green]All done[/bold green]")


if __name__ == "__main__":
main()
4 changes: 1 addition & 3 deletions variants/file_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,8 +653,6 @@ def _write_comment_sheet(self):
row = [
comment.chromosome,
comment.start,
comment.end,
comment.bin,
comment.reference,
comment.alternative,
comment.date_created,
Expand All @@ -663,7 +661,7 @@ def _write_comment_sheet(self):
]
if self.project_or_cohort:
row.insert(0, case.name)
self.variant_sheet.write_row(offset, 0, row)
self.comment_sheet.write_row(offset, 0, row)
offset += 1

def _write_metadata_sheet(self):
Expand Down
2 changes: 1 addition & 1 deletion variants/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _check_gene_list_found(gene_list, label):
# Validate HPO term list.
if "prio_hpo_terms" in query_settings:
missing = []
for term in query_settings["prio_hpo_terms"]:
for term in query_settings["prio_hpo_terms"] or []:
if term.startswith("HP"):
if not HpoName.objects.filter(hpo_id=term).exists():
missing.append(term)
Expand Down
20 changes: 9 additions & 11 deletions variants/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,15 @@ def _perform_create(obj):
for result_row in result_rows:
duplicates.append(
{
{
"case_uuid": str(case.sodar_uuid),
"case_name": case.name,
"project": case.project.full_title,
"chromosome": result_row.chromosome,
"start": result_row.start,
"end": result_row.end,
"json": json.dumps(
model_to_dict(result_row, exclude=("id",)), cls=UUIDEncoder
),
}
"case_uuid": str(case.sodar_uuid),
"case_name": case.name,
"project": case.project.full_title,
"chromosome": result_row.chromosome,
"start": result_row.start,
"end": result_row.end,
"json": json.dumps(
model_to_dict(result_row, exclude=("id",)), cls=UUIDEncoder
),
}
)

Expand Down
6 changes: 6 additions & 0 deletions variants/views/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,11 +845,17 @@ def get(self, request, *args, **kwargs):
if self.request.get_full_path() == reverse(
"variants:ajax-query-case-download-generate-tsv",
kwargs={"smallvariantquery": query.sodar_uuid},
) or self.request.get_full_path() == reverse(
"variants:api-query-case-download-generate-tsv",
kwargs={"smallvariantquery": query.sodar_uuid},
):
file_type = "tsv"
elif self.request.get_full_path() == reverse(
"variants:ajax-query-case-download-generate-vcf",
kwargs={"smallvariantquery": query.sodar_uuid},
) or self.request.get_full_path() == reverse(
"variants:api-query-case-download-generate-vcf",
kwargs={"smallvariantquery": query.sodar_uuid},
):
file_type = "vcf"
else:
Expand Down
Loading