diff --git a/.circleci/config.yml b/.circleci/config.yml index 368f80caf..a6705ed9e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,7 +34,7 @@ jobs: . venv/bin/activate git clone --depth 1 https://github.com/plotly/dash.git dash-main cd dash-main && pip install -e .[dev,testing] - cd dash-renderer && npm run build && pip install -e . && cd ../.. + cd dash-renderer && npm install && npm run build && pip install -e . && cd ../.. npm run build - run: @@ -84,7 +84,7 @@ jobs: . venv/bin/activate git clone --depth 1 https://github.com/plotly/dash.git dash-main cd dash-main && pip install -e .[dev,testing] - cd dash-renderer && npm run build && pip install -e . && cd ../.. + cd dash-renderer && npm install && npm run build && pip install -e . && cd ../.. - run: name: Run pylint diff --git a/dash_bio/__init__.py b/dash_bio/__init__.py index 3053a3aa7..54fe7dd6a 100644 --- a/dash_bio/__init__.py +++ b/dash_bio/__init__.py @@ -9,6 +9,7 @@ from .component_factory._manhattan import ManhattanPlot from .component_factory._volcano import VolcanoPlot from .component_factory._clustergram import Clustergram +from .component_factory._variant import VariantMap if not hasattr(_dash, 'development'): print('Dash was not successfully imported. ' diff --git a/dash_bio/component_factory/_variant.py b/dash_bio/component_factory/_variant.py new file mode 100644 index 000000000..0aad4105f --- /dev/null +++ b/dash_bio/component_factory/_variant.py @@ -0,0 +1,365 @@ +""" +VariantMap plot + +Author: CY THAM + +Version: 1.0.0 +""" + +import math +import numpy as np +import pandas as pd + +import plotly.graph_objects as go + + +def VariantMap( + dataframe, + entries_per_batch=2500, + batch_no=1, + annotation=None, + filter_sample=None, + filter_file=None, + sample_order=None, + title="", + sample_names=None, + color_list=None, + colorbar_thick=25, + rangeslider=True, + height=500, + width=600, +): + """Returns a Dash Bio VariantMap figure. + +Keyword arguments: + +- dataframe (dataframe; required): A pandas dataframe generated by VariantBreak. + Please pre-process your VCF files with VariantBreak and load the output object here. +- entries_per_batch (number; default 2500): Number of SV entries to display + in a batch. +- batch_no (number; default 1): Batch number to display in the plot. + SVs are grouped by batches and the batches are labeled numerically and + chronologically with descending SV prevalence. Only a single batch is + allowed to be displayed in an instance, unless a slider is used in an app + to switch between each batch. Number of total batches = total number of + SV entries / entries_per_batch, rounded up. +- annotation (dict; optional): A dictionary where the keys are annotation + labels and the values are list of respective annotations. Only SVs with + the selected annotations will be displayed in the plot. The keys are: + 'Gene_id', 'Transcript_id', 'Gene_name', 'Gene_type' and 'Gene_feature' + for GTF/GFF. For BED annotation files, the key will be their 4th column + label if present, or else they will be 'BED1', 'BED2' and so on. Please + refer to the legend.txt file. +- filter_sample (list; optional): The list of default sample names + (e.g. 'S1', 'S2') to be removed from the plot together with the SVs they + possessed. For example, a non-diseased sample can be selected by this + argument to omit non-diseased associated SVs in the remaining diseased sample. +- filter_file (list; optional): The list of default filter names + (e.g. 'Filter1', 'Filter2') for filter activation. SVs that overlapped with + the respective filter BED files will be excluded from the plot. +- sample_order (list, optional): The list of default sample names + (e.g. 'S1', 'S2') with the order intended for plotting. Samples can also be + omitted from the plot using this argument. +- title (string; optional): Title of plot. +- sample_names (dict; optional): If provided, sample labels will follow this + dict rather than the default labels (e.g. 'S1', 'S2') extracted from the + VariantBreak object. The keys should be: 'S1', 'S2', 'S3' and so on, + depending on how many samples you have. +- color_list (dict; optional): The list of colors to use for different SV classes. + The keys are: 'DEL' (deletion), 'INV' (inversion), 'INS' (insertion), + 'BND' (translocation or transposition), 'DUP' (tandem duplication), 'UKN' (unknown), + 'NIL' (SV not detected). +- colorbar_thick (number; optional): The thickness of the colorbar, in px. +- rangeslider (bool; default True): Whether or not to show the range slider. +- height (number; default 500): The height of the graph, in px. +- width (number; default 700): The width of the graph, in px. + + +Usage example: + +import pandas as pd +import dash_bio + +# Load dataframe and metadata +file_path = "/path/to/sample.h5" +with pd.HDFStore(file_path, mode="r") as store: + df = store['dataset'] + metadata = store.get_storer('dataset').attrs.metadata + +# Add metadata to dataframe +df.metadata = '' +df.metadata = metadata + +# Plot VariantMap +fig = dash_bio.VariantMap(df) + + """ + + # Get labels of samples to display + if sample_order is None: + # All samples to be displayed and default order + samples = dataframe.metadata["sample_names"] + else: + samples = sample_order + + sv_classes = ["NIL", "DEL", "INV", "INS", "BND", "DUP", "UKN"] + + color_dict = { + "DEL": "#4daf4a", + "INV": "#377eb8", + "INS": "#e41a1c", + "BND": "#984ea3", + "DUP": "#ff7f00", + "UKN": "#000000", + "NIL": "#d1d9e0", + } + + colors = [] + + # Generate color list for colorbar + if color_list is None: + for _class in sv_classes: + colors.append(color_dict[_class]) + else: + for _class in sv_classes: + try: + colors.append(color_list[_class]) + except KeyError: + colors.append(color_dict[_class]) + + vm = _VariantMap( + dataframe, + entries_per_batch, + batch_no, + annotation, + filter_sample, + filter_file, + title, + samples, + sample_names, + colors, + colorbar_thick, + rangeslider, + height, + width, + ) + + return vm.figure() + + +class _VariantMap: + + """Returns a Dash Bio VariantMap object. + +Methods: + +- figure: Returns a VariantMap plotly graph object. + """ + + def __init__( + self, + df, + entries_per_batch, + batch_no_for_display, + annotation, + filter_sample, + filter_file, + title, + samples, + sample_names, + colors, + colorbar_thick, + rangeslider, + height, + width, + ): + self.title = title + self.colorbar_thick = colorbar_thick + self.rangeslider = rangeslider + self.height = height + self.width = width + + # Generating discrete colorscale + markers = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4] + self.dcolorsc = discrete_colorscale(markers, colors) + self.tickvals = [0.071, 0.214, 0.357, 0.500, 0.643, 0.786, 0.929] + self.ticktext = ["NIL", "DEL", "INV", "INS", "BND", "DUP", "UKN"] + + # Subset dataframe by gene name and SV index + if annotation: + if "Gene_name" in annotation and "index_list" in annotation: + if annotation["Gene_name"] and annotation["index_list"]: + df_genes = df[ + df["Gene_name"].str.contains( + "|".join([x + ";" for x in annotation["Gene_name"]]) + ) + ].copy() + df_indexes = df.loc[annotation["index_list"], :].copy() + df = pd.concat([df_genes, df_indexes]) + else: + if annotation["Gene_name"]: + df = df[ + df["Gene_name"].str.contains( + "|".join([x + ";" for x in annotation["Gene_name"]]) + ) + ] + if annotation["index_list"]: + df = df.loc[annotation["index_list"], :] + else: + if "Gene_name" in annotation: + if annotation["Gene_name"]: + df = df[ + df["Gene_name"].str.contains( + "|".join([x + ";" for x in annotation["Gene_name"]]) + ) + ] + if "index_list" in annotation: + if annotation["index_list"]: + df = df.loc[annotation["index_list"], :] + + # Subset dataframe by annotation + if annotation: + for _key in annotation: + if annotation[_key]: + if _key in ["Gene_name", "index_list"]: + pass + else: + df = df[df[_key].str.contains("|".join(annotation[_key]))] + + # Subset dataframe by sample filter + if filter_sample: + for sample in filter_sample: + df = df[df[sample] == 0.0] + + # Subtset dataframe by filter file + if filter_file: + for _filter in filter_file: + df = df[df[_filter] != "1"] + + # Make a copy of dataframe + df_new = df.copy() + + # Get actual sample order list + sample_order = [x for x in samples if x in df_new.columns] + + # Calculate number of divisions + div = math.ceil(len(df_new) / entries_per_batch) + 0.001 + + # Calculate actual batch size + self.batch_size = math.ceil(len(df_new) / div) + + # Add batch number to dataframe + df_new.loc[:, "Group"] = ( + np.divmod(np.arange(len(df_new)), self.batch_size)[0] + 1 + ) + + # Subset dataframe by batch label + df_new = df_new[df_new["Group"].isin([int(batch_no_for_display)])] + + # Transpose dataframe + df_new = df_new.T + + # Subset sample rows from dataframe and convert to list of lists + z = df_new.loc[sample_order, :].values.tolist() + + # Reverse list + self.z = z[::-1] + + # Subset hover-text row from dataframe and convert to list of lists + hover_list = ["Hover_" + x for x in sample_order] + hover_text = df_new.loc[hover_list, :].values.tolist() + + # Reverse list + self.hover = hover_text[::-1] + + # Change sample labels if provided + if sample_names is None: + names = sample_order + else: + names = [] + for name in sample_order: + try: + names.append(sample_names[name]) + except KeyError: + names.append(name) + + # Reverse sample name list + names.reverse() + self.names = names + + def figure(self): + """ + :return: a go.Figure object + """ + trace1 = go.Heatmap( + z=self.z, + y=self.names, + colorscale=self.dcolorsc, + colorbar=dict( + title=dict( + text="SV classes", + font=dict(family="Open Sans", size=14, color="#ffffff"), + ), + thickness=self.colorbar_thick, + tickvals=self.tickvals, + ticktext=self.ticktext, + tickfont=dict(family="Open Sans", size=14, color="#ffffff"), + ), + zmin=0.0, + zmax=1.0, + hovertext=self.hover, + hoverinfo="text", + xgap=2, + ygap=2, + ) + + layout = go.Layout( + title=dict( + text="" + self.title + "", + font=dict(family="Open Sans", size=18, color="#ffffff"), + x=0.48, + ), + xaxis=dict( + title=dict( + text="Variants", + font=dict(family="Open Sans", size=16, color="#ffffff"), + standoff=3, + ), + rangeslider=dict(visible=self.rangeslider), + showticklabels=False, + side="top", + type="-", + ), + yaxis=dict( + title=dict( + text="Samples", + font=dict(family="Open Sans", size=16, color="#ffffff"), + standoff=3, + ), + tickfont=dict(family="Open Sans", size=14, color="#ffffff"), + ), + height=self.height, + width=self.width, + paper_bgcolor="rgba(10,43,77,255)", + plot_bgcolor="rgba(255,255,255,255)", + ) + + return go.Figure(data=[trace1], layout=layout) + + +def discrete_colorscale(markers, colors): + """ + :param markers: + :param colors: + :return: color scale + """ + markers = sorted(markers) + norm_mark = [ + round((v - markers[0]) / (markers[-1] - markers[0]), 3) for v in markers + ] + dcolorscale = [] + for k in enumerate(colors): + dcolorscale.extend( + [[norm_mark[k[0]], colors[k[0]]], [norm_mark[k[0] + 1], colors[k[0]]]] + ) + return dcolorscale diff --git a/tests/dashbio_demos/dash-variant-map/Procfile b/tests/dashbio_demos/dash-variant-map/Procfile new file mode 100644 index 000000000..38371ebbf --- /dev/null +++ b/tests/dashbio_demos/dash-variant-map/Procfile @@ -0,0 +1 @@ +web: gunicorn app:server diff --git a/tests/dashbio_demos/dash-variant-map/app.py b/tests/dashbio_demos/dash-variant-map/app.py new file mode 100644 index 000000000..565ee2e8f --- /dev/null +++ b/tests/dashbio_demos/dash-variant-map/app.py @@ -0,0 +1,861 @@ +""" +This app is an example showing how VariantMap can be utilized in a Dash app. +VariantMap is a genomic structural variant (SV) visualization technique that +displays variants across multiple samples in a single heatmap. + +NOTE: This app may not be able to handle large input files (>32M) in Google Chrome. + Do try with Mozilla Firefox for these larger files. + +Author: CY THAM + +Version: 1.0.0 +""" + +import os +import base64 +import io + +import math +import pandas as pd +import dash_bio +import dash_html_components as html +import dash_core_components as dcc +from dash.dependencies import Input, Output, State +from dash.exceptions import PreventUpdate + + +try: + from layout_helper import run_standalone_app +except ModuleNotFoundError: + from .layout_helper import run_standalone_app + + +DATAPATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") + + +def description(): + return "Variant Map visualizes cohort structural variants in a heatmap." + + +def header_colors(): + return {"bg_color": "#1f6335", "font_color": "#ffffff", "light_logo": True} + + +main_desc = ( + "VariantMap is a genomic structural variant (SV) " + "visualization technique that displays variants across " + "multiple samples in a single heatmap. Each row represents " + "a sample and each column represents an SV breakend in the " + "sample cohort. The colors indicate the class of an SV " + "present in a sample. The heatmap can be customized " + "interactively to suit your analysis by changing various " + 'components in the "Customize" tab.' +) + + +data_desc = ( + "VariantMap requires a dataframe object that is generated " + "by VariantBreak. Do note that only NanoVar VCF " + "files are currently compatible to work with VariantBreak " + "in creating the dataframe." +) + + +def layout(): + return html.Div( + id="variantmap-body", + className="app-body", + children=[ + html.Div( + id="variantmap-control-tabs", + className="control-tabs", + children=[ + dcc.Tabs( + id="variantmap-tabs", + value="what-is", + children=[ + # "What is" tab + dcc.Tab( + label="About", + value="what-is", + children=html.Div( + className="control-tab", + children=[ + html.H4( + className="what-is", + children="What is Variant Map?", + ), + html.P(main_desc), + html.P(data_desc), + ], + ), + ), + # Data tab + dcc.Tab( + label="Data", + value="data", + children=html.Div( + className="control-tab", + children=[ + # Dataset upload + html.Div( + "Upload dataset:", + title="Upload your own dataset below.", + className="app-controls-name", + ), + html.Div( + id="variantmap-file-upload", + title=( + "Upload your own VariantBreak " + "generated HDF5 dataset here." + ), + children=[ + dcc.Upload( + id="upload-data", + className="control-upload", + children=html.Div([ + "Drag and drop a .h5 file or ", + html.A("select a file."), + ]), + accept=".hdf5,.h5", + multiple=False, + ) + ], + ), + html.Br(), + # Label file upload + html.Div( + "Upload label file:", + title=( + "This file is used to rename and " + "sort samples.\n" + "Example:\n" + "#Default nameLabel\n" + "S1SampleA\n" + "S3SampleC\n" + "S2SampleB" + ), + className="app-controls-name", + ), + html.Div( + id="variantmap-tsv-upload", + title=( + "Upload a .tsv file to rename and " + "sort samples.\n" + "Example:\n" + "#Default nameLabel\n" + "S1SampleA\n" + "S3SampleC\n" + "S2SampleB" + ), + children=[ + dcc.Upload( + id="upload-tsv", + className="control-upload", + children=html.Div([ + "Drag and drop a .tsv file or ", + html.A("select file."), + ]), + accept=".txt,.tsv,.csv", + multiple=False, + ) + ], + ), + html.Br(), + # Sample selection check boxes + html.Div( + id="output-data-info", + className="fullwidth-app-controls-name", + children=[ + dcc.Checklist( + id="select-samples", + style={"display": "none"}, + ), + html.Br(), + html.Button( + id="submit-button-samples", + style={"display": "none"}, + ), + ], + ), + ], + ), + ), + # Customize tab + dcc.Tab( + label="Customize", + value="customize", + children=html.Div( + className="control-tab", + children=[ + html.Div( + id="customize-tab", + className="fullwidth-app-controls-name", + children=[ + dcc.Dropdown( + id="sample_filt", + style={"display": "none"}, + ), + dcc.Dropdown( + id="file_filt", + style={"display": "none"}, + ), + dcc.Dropdown( + id="gene_names", + style={"display": "none"}, + ), + dcc.Input( + id="input_index", + style={"display": "none"}, + ), + dcc.Checklist( + id="select-genetype", + style={"display": "none"}, + ), + dcc.Checklist( + id="select-feature", + style={"display": "none"}, + ), + dcc.Checklist( + id="select-annotation", + style={"display": "none"}, + ), + dcc.Input( + id="entries_size", + style={"display": "none"}, + ), + html.Button( + id="submit-button", + style={"display": "none"}, + ), + ], + ) + ], + ), + ), + # Variant info tab + dcc.Tab( + label="Variant info", + value="info", + children=html.Div( + className="control-tab", + children=[ + html.Div( + id="info-tab", + className="fullwidth-app-controls-name", + children=[ + html.Div( + "Click on variant to display " + "its information" + ) + ], + ) + ], + ), + ), + ], + ) + ], + ), + dcc.Loading( + className="dashbio-loading", + children=html.Div( + id="variantmap-wrapper", + children=[ + # Error message box + html.Div( + id="error-msg", + style={ + "color": "crimson", + "text-align": "center", + "font-size": "18px", + }, + ), + # Plot VariantMap figure + html.Div( + id="variantmap-fig", + children=[ + html.Div( + dcc.Graph(id="variantmap"), + style={"display": "none"}, + ) + ], + ), + # Plot Slider + html.Div( + id="batch-slider", + children=[ + html.Div("", style={"textAlign": "center"}), + html.Div( + dcc.Slider(id="slider",), style={"display": "none"} + ), + ], + ), + ], + ), + ), + # Create Store component to store JSON of dataframe and metadata + dcc.Store(id="memory"), + # To store variant counts + dcc.Store(id="count-store"), + # To store custom settings + dcc.Store(id="custom-store"), + # To store name dictionary + dcc.Store(id="name_dict"), + # To store sample labels + dcc.Store(id="sample_labels"), + # To store sample order + dcc.Store(id="sample_order"), + ], + ) + + +def callbacks(app): + + # Callback upon uploading of dataset + @app.callback( + [ + Output("output-data-info", "children"), + Output("count-store", "data"), + Output("memory", "data"), + Output("customize-tab", "children"), + Output("name_dict", "data"), + ], + [Input("upload-data", "contents"), Input("upload-data", "filename")], + ) + def read_data(contents, filename): + # print("call - read_data") + if filename: + content_type, content_string = contents.split(",") + + # Decode base64 + decoded = base64.b64decode(content_string) + + # Load input hdf5 file into a pandas dataframe and extract metadata + with pd.HDFStore( + "data.h5", + mode="r", + driver="H5FD_CORE", + driver_core_backing_store=0, + driver_core_image=io.BytesIO(decoded).read(), + ) as store: + df = store["dataset"] + metadata = store.get_storer("dataset").attrs.metadata + + else: + # Load sample hdf5 into a pandas dataframe and extract metadata + filename = "sample_data.h5" + with pd.HDFStore(os.path.join(DATAPATH, filename), mode="r") as store: + df = store["dataset"] + metadata = store.get_storer("dataset").attrs.metadata + + # Sample_info children + child_sample = [ + html.Div("Input file: {}".format(filename)), + html.Br(), + html.Div('Select samples to display and click "SUBMIT":'), + dcc.Checklist( + id="select-samples", + options=[ + {"label": name, "value": name} for name in metadata["sample_names"] + ], + value=[name for name in metadata["sample_names"]], + ), + html.Br(), + html.Button( + id="submit-button-samples", className="customButton", children="SUBMIT" + ), + ] + + # Count store count + row_counts = df.shape[0] + + # Memory data + datasets = { + "df": df.to_json(orient="split", date_format="iso"), + "metadata": metadata, + } + + # Set-up customize tab + # Get all unique gene names + total_genes = set() + if "Gene_name" in df.columns: + for genes in df.Gene_name: + if genes != "": + for gene in genes.split("/"): + total_genes.add(gene.strip().rstrip(";")) + total_genes = sorted(total_genes) + + # Get all unique gene types + gene_types = set() + if "Gene_type" in df.columns: + for t in df.Gene_type: + if t != "": + for _t in t.split(","): + gene_types.add(_t.strip()) + + main_names = ["protein_coding", "lncRNA", "miRNA", "snRNA", "snoRNA"] + others = sorted(gene_types.difference(main_names)) + + # Get labels of non-GTF annotation columns + bed_annote = [] + for name in metadata["annotation"]: + if name != "GTF": + bed_annote.append(name) + + # Get all unique annotation names + annotes = set() + for bed in bed_annote: + for annote in df.loc[:, bed]: + if annote != "": + for _annote in annote.split("/"): + annotes.add(_annote.strip()) + annotes = sorted(annotes) + annote_dict = {annote: annote for annote in annotes} + + # Create name dictionary + name_dict = { + "Promoter": "promoter", + "Exon": "exon", + "Intron": "intron", + "Protein coding": "protein_coding", + "lncRNA": "lncRNA", + "miRNA": "miRNA", + "snRNA": "snRNA", + "snoRNA": "snoRNA", + "Others": others, + } + + name_dict.update(annote_dict) + + # Define fixed gene types + main_types = ["Protein coding", "lncRNA", "miRNA", "snRNA", "snoRNA", "Others"] + + # Define fixed gene features + features = ["Promoter", "Exon", "Intron"] + + # Customize tab children + child_customize = [ + html.Div( + "Customize the heatmap by adjusting the components below and " + 'click "SUBMIT" at the end after finalizing your settings. ' + "Hover over each section header for more information." + ), + html.Br(), + html.Div( + "Filter by variant file:", + title="Hide variants that are present in these samples.", + style={"font-weight": "bold"}, + ), + dcc.Dropdown( + id="sample_filt", + options=[ + {"label": name, "value": name} for name in metadata["sample_names"] + ], + value=None, + multi=True, + placeholder="Variant files", + searchable=False, + ), + html.Br(), + html.Div( + "Filter by filter file:", + title="Hide variants that intersect with these filter files.", + style={"font-weight": "bold"}, + ), + dcc.Dropdown( + id="file_filt", + options=[{"label": name, "value": name} for name in metadata["filter"]], + value=None, + multi=True, + placeholder="Filter files", + searchable=False, + ), + html.Br(), + html.Div( + "Search variants by gene name:", + title="Select only variants annotated with these gene names.", + style={"font-weight": "bold"}, + ), + dcc.Dropdown( + id="gene_names", + options=[{"label": name, "value": name} for name in total_genes], + value=None, + multi=True, + placeholder="Search gene names", + ), + html.Br(), + html.Div( + "Search variants by index:", + title=( + 'Select only variants labeled with these indexes separated by ";"' + ), + style={"font-weight": "bold"}, + ), + dcc.Input( + id="input_index", type="text", value=None, placeholder="Search indexes" + ), + html.Br(), + html.Br(), + html.Div( + "Filter by gene type:", + title="Select only variants annotated with these gene types.", + style={"font-weight": "bold"}, + ), + dcc.Checklist( + id="select-genetype", + options=[{"label": name, "value": name} for name in main_types], + ), + html.Br(), + html.Div( + "Filter by gene feature:", + title="Select only variants annotated with these gene features.", + style={"font-weight": "bold"}, + ), + dcc.Checklist( + id="select-feature", + options=[{"label": name, "value": name} for name in features], + ), + html.Br(), + html.Div( + "Filter by other annotations:" if len(annotes) > 0 else None, + title="Select only variants annotated with these annotations.", + style={"font-weight": "bold"}, + ), + dcc.Checklist( + id="select-annotation", + options=[{"label": name, "value": name} for name in annotes], + ), + html.Br(), + html.Div( + "Set section size:", + title="Set the number of variants to display per section.", + style={"font-weight": "bold"}, + ), + dcc.Input( + id="entries_size", + type="number", + value=2500, + placeholder="No. of SVs", + debounce=True, + min=100, + max=500000, + ), + html.Br(), + html.Br(), + html.Button( + id="submit-button", className="customButton", children="SUBMIT" + ), + ] + return child_sample, row_counts, datasets, child_customize, name_dict + + # Callback upon uploading of label file + @app.callback( + [Output("sample_labels", "data"), Output("sample_order", "data")], + [Input("upload-tsv", "contents")], + ) + def rename_labels(contents): + label_dict = {} + sample_order = [] + if contents: + content_type, content_string = contents.split(",") + # Decode base64 + decoded = base64.b64decode(content_string) + for line in decoded.decode("utf-8").splitlines(): + if not line.startswith("#"): + label_dict[line.split("\t")[0]] = line.split("\t")[1] + sample_order.append(line.split("\t")[0]) + return label_dict, sample_order + + # Callback upon storing customize and count data + @app.callback( + Output("batch-slider", "children"), + [Input("count-store", "data"), Input("custom-store", "data")], + ) + def make_slider(row_counts, custom_config): + # print("call - make_slider") + if row_counts is None: + raise PreventUpdate + + if custom_config is None: + entries = 2500 # Default entries number + # Calculate number of divisions of default dataframe + div = max(math.ceil(row_counts / entries), 1) + else: + entries = custom_config["entries"] + new_row_counts = custom_config["row_counts"] + div = max(math.ceil(new_row_counts / entries), 1) + + child_slider = [ + html.Div( + "Sections of %i Variants" % entries, style={"textAlign": "center"} + ), + html.Div( + dcc.Slider( + id="slider", + min=1, + max=div, + value=1, + marks={str(i + 1): str(i + 1) for i in range(div)}, + step=None, + ) + ), + ] + return child_slider + + # Callback upon clicking customize submit button + @app.callback( + Output("custom-store", "data"), + [Input("submit-button", "n_clicks")], + [ + State("sample_filt", "value"), + State("file_filt", "value"), + State("input_index", "value"), + State("gene_names", "value"), + State("select-genetype", "value"), + State("select-feature", "value"), + State("select-annotation", "value"), + State("entries_size", "value"), + State("memory", "data"), + State("name_dict", "data"), + ], + ) + def store_custom( + n_clicks, + sample_filt, + file_filt, + index_str, + gene_names, + gene_types, + features, + annotes, + entries, + data, + name_dict, + ): + # print("call - store_custom") + if n_clicks is None: + # print("Update - store_custom = None") + return None + + sample_list = [] + filter_list = [] + index_list = [] + annotation_dict = {} + custom_dict = {} + + if sample_filt: + for i in sample_filt: + sample_list.append(i) + + if file_filt: + for i in file_filt: + filter_list.append(i) + + if index_str: + index_list = [x.strip() for x in index_str.split(";") if x] + custom_dict["index_list"] = index_list + + annotation_dict["Gene_name"] = [] + if gene_names: + for i in gene_names: + annotation_dict["Gene_name"].append(i) + + annotation_dict["Gene_type"] = [] + if gene_types: + for i in gene_types: + annotation_dict["Gene_type"].append(i) + + annotation_dict["Gene_feature"] = [] + if features: + for i in features: + annotation_dict["Gene_feature"].append(i) + + for name in data["metadata"]["annotation"]: + if name != "GTF": + if annotes: + annotation_dict[name] = annotes + else: + annotation_dict[name] = [] + + custom_dict["entries"] = entries + + # Load dataframe from memory + df = pd.read_json(data["df"], orient="split") + + # Check if variant indexes present in dataframe + # if not df.index.isin(index_list).any(): + # raise HaltCallback('ERROR: Some variant indexes not found in the data.') + try: + df = df.loc[index_list, :] + except KeyError: + pass + + # Calculate row counts of new subsetted dataframe + # Subset dataframe by annotation + for col in annotation_dict: + custom_dict[col] = [] + if annotation_dict[col]: # If not blank list + try: + labels = [name_dict[x] for x in annotation_dict[col]] + except KeyError: + labels = [x for x in annotation_dict[col]] + new_labels = [] + for i in labels: + if type(i) == list: + for j in i: + new_labels.append(j) + else: + new_labels.append(i) + df = df[df[col].isin(new_labels)] + custom_dict[col] = new_labels + + # Subset dataframe by sample filter + if sample_list: # If not blank list + for sample in sample_list: + df = df[df[sample] == 0.0] + + # Subtset dataframe by filter file + if filter_list: # If not blank list + for _filter in filter_list: + df = df[df[_filter] != "1"] + + custom_dict["row_counts"] = df.shape[0] + custom_dict["filter_sample"] = sample_list + custom_dict["filter_file"] = filter_list + custom_dict["index_list"] = index_list + + return custom_dict + + # Callback upon slider selection, data submit button click + # and storing of sample label data + @app.callback( + [Output("variantmap-fig", "children"), Output("error-msg", "children")], + [ + Input("slider", "value"), + Input("submit-button-samples", "n_clicks"), + Input("sample_labels", "data"), + ], + [ + State("memory", "data"), + State("custom-store", "data"), + State("select-samples", "value"), + State("sample_order", "data"), + ], + ) + def update_figure( + selected_batch, + n_clicks, + label_dict, + data, + custom_config, + sample_list, + sample_order, + ): + # print("call - update_figure") + if selected_batch is None: + # print("PreventUpdate - update_figure") + raise PreventUpdate + + error_msg = None + + # Load dataframe from memory + df = pd.read_json(data["df"], orient="split") + + # Add metadata to dataframe + df.metadata = "" + df.metadata = data["metadata"] + + # Rename sample labels + if label_dict: + names_dict = label_dict + else: + names_dict = {} + + # Reorder sample_list by sample_order + sample_sortlist = [] + if sample_order: + for i in sample_order: + if i in sample_list: + sample_sortlist.append(i) + # Add remaining samples that were not in sample_order + for i in sample_list: + if i not in sample_sortlist: + sample_sortlist.append(i) + + # Create figure + if custom_config is None: # If custom_config settings are not provided + fig = dash_bio.VariantMap( + df, + batch_no=selected_batch, + sample_order=sample_sortlist, + sample_names=names_dict, + ) + + else: + # Slicing dataframe by variant indexes + annotation = {} + try: + if custom_config["index_list"]: + # Test if SV indexes exist in data + _ = df.loc[custom_config["index_list"], :] + annotation["index_list"] = custom_config["index_list"] + except KeyError: + error_msg = "ERROR: Selected variant indexes not found in data." + + # Preparing annotation filters + for name in data["metadata"]["annotation"]: + if name != "GTF": + annotation[name] = custom_config[name] + + annotation["Gene_name"] = custom_config["Gene_name"] + annotation["Gene_type"] = custom_config["Gene_type"] + annotation["Gene_feature"] = custom_config["Gene_feature"] + + # Assign VariantMap plot to fig + fig = dash_bio.VariantMap( + df, + entries_per_batch=custom_config["entries"], + batch_no=selected_batch, + annotation=annotation, + filter_sample=custom_config["filter_sample"], + filter_file=custom_config["filter_file"], + sample_order=sample_sortlist, + sample_names=names_dict, + ) + + # Children for variantmap-fig + child_fig = [ + dcc.Graph(id="variantmap", figure=fig, config={"scrollZoom": True}) + ] + return child_fig, error_msg + + # Callback upon clicking on data points on heatmap + @app.callback(Output("info-tab", "children"), [Input("variantmap", "clickData")]) + def display_click_data(clickdata): + if clickdata is None: + raise PreventUpdate + points = clickdata["points"][0] + hovertext = points["hovertext"] + hoverline = [] + for x in hovertext.split("
"): + if x: + hoverline.append(x) + hoverline.append(html.Br()) + else: # if blank line + hoverline.append(html.Br()) + child_info = [ + html.Div("Click on variant to display its information"), + html.Br(), + html.Div(children=hoverline), + ] + return child_info + + +# only declare app/server if the file is being run directly +if "DEMO_STANDALONE" not in os.environ: + app = run_standalone_app(layout, callbacks, header_colors, __file__) + server = app.server + +if __name__ == "__main__": + app.run_server(debug=True, port=8050) diff --git a/tests/dashbio_demos/dash-variant-map/assets/dash-variant-map.css b/tests/dashbio_demos/dash-variant-map/assets/dash-variant-map.css new file mode 100644 index 000000000..5a6d505eb --- /dev/null +++ b/tests/dashbio_demos/dash-variant-map/assets/dash-variant-map.css @@ -0,0 +1,85 @@ +#variantmap-body { + color: #ffffff !important; + background-color: #0a2b4d !important; +} +#variantmap-body ::-webkit-scrollbar { + background-color: #eff0e6 !important; +} +#variantmap-body ::-webkit-scrollbar-thumb { + background-color: #0a2b4d !important; +} +#variantmap-wrapper { + width: 600px; + display: inline-block !important; + padding: 0px; + margin: 0px; +} +#slider-wrapper { + width: 600px; + display: inline-block !important; + padding: 0px; + margin: 0px; +} +#variantmap-control-tabs { + background-color: #1f6335 !important; +} +#variantmap-control-tabs .control-tab { + background-color: #1f6335 !important; + border-color: #ffffff !important; + color: #ffffff; +} +#variantmap-control-tabs .tab { + background-color: #1f6335 !important; + border-color: #ffffff !important; + color: #ffffff; +} +#variantmap-control-tabs #gene_names, #sample_filt, #file_filt { + color: #000000; +} +#variantmap-control-tabs .tab--selected { + color: #ffffff !important; + border-top-color: #ffffff !important; +} +#variantmap-control-tabs .control-upload { + border: dotted 1px #ffffff !important; +} +.customButton { + background-color: #0a2b4d; + border-radius: 26px; + border: 2px solid #ffffff; + display: inline-block; + cursor: pointer; + font-size: 14px; + padding: 0px 14px; + color: #ffffff; + float: left; +} +.customButton:hover { + background-color:#ffffff; +} + +.customButton:active { + position:relative; + top:2px; +} +.customButton:focus { + background-color: #0a2b4d; + color: #ffffff; +} +#app-page-content .rc-slider-handle { + border: solid 2px #e9e9e9 !important; + background-color: #1f6335; +} +.rc-slider-dot-active { + border: 1.66667px solid #e9e9e9 !important; +} +#app-page-content .rc-slider-track { + background-color: #e9e9e9 !important; +} +.rc-slider-mark-text { + color: #ffffff !important; +} +.rc-slider-mark-text-active { + color: #ffffff !important; +} + diff --git a/tests/dashbio_demos/dash-variant-map/assets/demo-image.png b/tests/dashbio_demos/dash-variant-map/assets/demo-image.png new file mode 100644 index 000000000..e74f0799e Binary files /dev/null and b/tests/dashbio_demos/dash-variant-map/assets/demo-image.png differ diff --git a/tests/dashbio_demos/dash-variant-map/data/sample_data.h5 b/tests/dashbio_demos/dash-variant-map/data/sample_data.h5 new file mode 100644 index 000000000..fe313b7f3 Binary files /dev/null and b/tests/dashbio_demos/dash-variant-map/data/sample_data.h5 differ diff --git a/tests/dashbio_demos/dash-variant-map/requirements.txt b/tests/dashbio_demos/dash-variant-map/requirements.txt new file mode 100644 index 000000000..7a94dcbc9 --- /dev/null +++ b/tests/dashbio_demos/dash-variant-map/requirements.txt @@ -0,0 +1,15 @@ +cython>=0.19 +dash>=1.6.1 +-e git://github.com/plotly/dash-bio.git#egg=dash_bio +dash-bio-utils==0.0.4 +dash-daq==0.2.2 +gunicorn==19.9.0 +jsonschema==2.6.0 +matplotlib==3.0.2 +numpy==1.15.4 +pandas>=0.24.2 +plotly>=3.5.0 +PubChemPy==1.0.4 +requests==2.21.0 +scikit-learn==0.20.2 +scipy>=1.1.0 diff --git a/tests/dashbio_demos/dash-variant-map/src/dash-bio b/tests/dashbio_demos/dash-variant-map/src/dash-bio new file mode 160000 index 000000000..320d91fe0 --- /dev/null +++ b/tests/dashbio_demos/dash-variant-map/src/dash-bio @@ -0,0 +1 @@ +Subproject commit 320d91fe06959bcbcc56215d4a3b144171330e0a diff --git a/tests/integration/test_variant_map.py b/tests/integration/test_variant_map.py new file mode 100644 index 000000000..ea8018f79 --- /dev/null +++ b/tests/integration/test_variant_map.py @@ -0,0 +1,29 @@ +import pandas as pd + +import dash +import dash_bio +import dash_core_components as dcc + + +# Load dataframe and metadata +_data_file = 'tests/dashbio_demos/dash-variant-map/data/sample_data.h5' +with pd.HDFStore(_data_file, mode="r") as store: + _data = store['dataset'] + metadata = store.get_storer('dataset').attrs.metadata + +# Add metadata to dataframe +_data.metadata = '' +_data.metadata = metadata + + +def test_dbvm001_basic(dash_duo): + + app = dash.Dash(__name__) + app.layout = dcc.Graph( + id="variantmap", + figure=dash_bio.VariantMap(_data) + ) + + dash_duo.start_server(app) + dash_duo.wait_for_element('#variantmap') + dash_duo.percy_snapshot('variantmap-basic') diff --git a/tests/requirements.txt b/tests/requirements.txt index 266fa7e84..c540b00e6 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,3 +2,4 @@ # Additional packages needed to run the tests. ipdb==0.11 +tables>=3.6.1