|  | 
|  | 1 | +# --- | 
|  | 2 | +# jupyter: | 
|  | 3 | +#   jupytext: | 
|  | 4 | +#     text_representation: | 
|  | 5 | +#       extension: .py | 
|  | 6 | +#       format_name: percent | 
|  | 7 | +#       format_version: '1.3' | 
|  | 8 | +#       jupytext_version: 1.17.2 | 
|  | 9 | +#   kernelspec: | 
|  | 10 | +#     display_name: vuecore-dev | 
|  | 11 | +#     language: python | 
|  | 12 | +#     name: python3 | 
|  | 13 | +# --- | 
|  | 14 | + | 
|  | 15 | +# %% [markdown] | 
|  | 16 | +# # Bar Plot | 
|  | 17 | +# | 
|  | 18 | +# ![VueCore logo][vuecore_logo] | 
|  | 19 | +# | 
|  | 20 | +# [![Open In Colab][colab_badge]][colab_link] | 
|  | 21 | +# | 
|  | 22 | +# [VueCore][vuecore_repo] is a Python package for creating interactive and static visualizations of multi-omics data. | 
|  | 23 | +# It is part of a broader ecosystem of tools—including [ACore][acore_repo] for data processing and [VueGen][vuegen_repo] for automated reporting—that together enable end-to-end workflows for omics analysis. | 
|  | 24 | +# | 
|  | 25 | +# This notebook demonstrates how to generate bar plots using plotting functions from VueCore. We showcase basic and advanced plot configurations, highlighting key customization options such as grouping, color mapping, text annotations, and export to multiple file formats. | 
|  | 26 | +# | 
|  | 27 | +# ## Notebook structure | 
|  | 28 | +# | 
|  | 29 | +# First, we will set up the work environment by installing the necessary packages and importing the required libraries. Next, we will create basic and advanced bar plots. | 
|  | 30 | +# | 
|  | 31 | +# 0. [Work environment setup](#0-work-environment-setup) | 
|  | 32 | +# 1. [Basic bar plot](#1-basic-bar-plot) | 
|  | 33 | +# 2. [Advanced bar plot](#2-advanced-bar-plot) | 
|  | 34 | +# | 
|  | 35 | +# ## Credits and Contributors | 
|  | 36 | +# - This notebook was created by Sebastián Ayala-Ruano under the supervision of Henry Webel and Alberto Santos, head of the [Multiomics Network Analytics Group (MoNA)][Mona] at the [Novo Nordisk Foundation Center for Biosustainability (DTU Biosustain)][Biosustain]. | 
|  | 37 | +# - You can find more details about the project in this [GitHub repository][vuecore_repo]. | 
|  | 38 | +# | 
|  | 39 | +# [colab_badge]: https://colab.research.google.com/assets/colab-badge.svg | 
|  | 40 | +# [colab_link]: https://colab.research.google.com/github/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/bar_plot.ipynb | 
|  | 41 | +# [vuecore_logo]: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/main/docs/images/logo/vuecore_logo.svg | 
|  | 42 | +# [Mona]: https://multiomics-analytics-group.github.io/ | 
|  | 43 | +# [Biosustain]: https://www.biosustain.dtu.dk/ | 
|  | 44 | +# [vuecore_repo]: https://github.com/Multiomics-Analytics-Group/vuecore | 
|  | 45 | +# [vuegen_repo]: https://github.com/Multiomics-Analytics-Group/vuegen | 
|  | 46 | +# [acore_repo]: https://github.com/Multiomics-Analytics-Group/acore | 
|  | 47 | + | 
|  | 48 | +# %% [markdown] | 
|  | 49 | +# ## 0. Work environment setup | 
|  | 50 | + | 
|  | 51 | +# %% [markdown] | 
|  | 52 | +# ### 0.1. Installing libraries and creating global variables for platform and working directory | 
|  | 53 | +# | 
|  | 54 | +# To run this notebook locally, you should create a virtual environment with the required libraries. If you are running this notebook on Google Colab, everything should be set. | 
|  | 55 | + | 
|  | 56 | +# %% tags=["hide-output"] | 
|  | 57 | +# VueCore library | 
|  | 58 | +# %pip install vuecore | 
|  | 59 | + | 
|  | 60 | +# %% tags=["hide-cell"] | 
|  | 61 | +import os | 
|  | 62 | + | 
|  | 63 | +IN_COLAB = "COLAB_GPU" in os.environ | 
|  | 64 | + | 
|  | 65 | +# %% tags=["hide-cell"] | 
|  | 66 | +# Create a directory for outputs | 
|  | 67 | +output_dir = "./outputs" | 
|  | 68 | +os.makedirs(output_dir, exist_ok=True) | 
|  | 69 | + | 
|  | 70 | +# %% [markdown] | 
|  | 71 | +# ### 0.2. Importing libraries | 
|  | 72 | + | 
|  | 73 | +# %% | 
|  | 74 | +# Imports | 
|  | 75 | +import pandas as pd | 
|  | 76 | +import numpy as np | 
|  | 77 | +from pathlib import Path | 
|  | 78 | + | 
|  | 79 | +from vuecore.plots.basic.bar import create_bar_plot | 
|  | 80 | + | 
|  | 81 | +# %% [markdown] | 
|  | 82 | +# ### 0.3. Create sample data | 
|  | 83 | +# We create a synthetic dataset representing the relative abundances of common bacterial genera across various environmental samples. | 
|  | 84 | + | 
|  | 85 | +# %% | 
|  | 86 | +# Set a random seed for reproducibility of the synthetic data | 
|  | 87 | +np.random.seed(42) | 
|  | 88 | + | 
|  | 89 | +# Sample types and bacterial genera | 
|  | 90 | +sample_types = ["Soil", "Freshwater", "Ocean", "Sediment", "Wastewater"] | 
|  | 91 | +genera = [ | 
|  | 92 | +    "Pseudomonas", | 
|  | 93 | +    "Bacillus", | 
|  | 94 | +    "Escherichia", | 
|  | 95 | +    "Streptococcus", | 
|  | 96 | +    "Lactobacillus", | 
|  | 97 | +    "Bacteroides", | 
|  | 98 | +    "Clostridium", | 
|  | 99 | +    "Staphylococcus", | 
|  | 100 | +    "Enterobacter", | 
|  | 101 | +    "Klebsiella", | 
|  | 102 | +    "Salmonella", | 
|  | 103 | +    "Shigella", | 
|  | 104 | +    "Vibrio", | 
|  | 105 | +] | 
|  | 106 | + | 
|  | 107 | + | 
|  | 108 | +def make_sample(sample: str, genera: list[str]) -> list[dict]: | 
|  | 109 | +    """ | 
|  | 110 | +    Generate synthetic microbial abundance data for a single sample. | 
|  | 111 | +
 | 
|  | 112 | +    Parameters | 
|  | 113 | +    ---------- | 
|  | 114 | +    sample : str | 
|  | 115 | +        The sample type (e.g., 'Soil', 'Ocean', etc). | 
|  | 116 | +    genera : list[str] | 
|  | 117 | +        List of all possible bacterial genera. | 
|  | 118 | +
 | 
|  | 119 | +    Returns | 
|  | 120 | +    ------- | 
|  | 121 | +    list[dict] | 
|  | 122 | +        A list of dictionaries, each containing: Sample name, Genus, | 
|  | 123 | +        Relative abundance, and Genera count. | 
|  | 124 | +    """ | 
|  | 125 | +    # Randomly pick a subset of genera present in this sample | 
|  | 126 | +    selected = np.random.choice( | 
|  | 127 | +        genera, np.random.randint(5, len(genera) + 1), replace=False | 
|  | 128 | +    ) | 
|  | 129 | + | 
|  | 130 | +    # Generate random raw abundances (shifted by +0.1 to avoid zeros) | 
|  | 131 | +    raw = np.random.rand(len(selected)) + 0.1 | 
|  | 132 | + | 
|  | 133 | +    # Normalize abundances so they sum to exactly 100% | 
|  | 134 | +    abundances = (raw / raw.sum()) * 100 | 
|  | 135 | + | 
|  | 136 | +    # Count how many genera are present | 
|  | 137 | +    genera_count = len(selected) | 
|  | 138 | + | 
|  | 139 | +    # Store results into list of dicts | 
|  | 140 | +    return [ | 
|  | 141 | +        { | 
|  | 142 | +            "Sample": sample, | 
|  | 143 | +            "Genus": genus, | 
|  | 144 | +            "Relative_Abundance": abund, | 
|  | 145 | +            "Genera_Count": genera_count, | 
|  | 146 | +        } | 
|  | 147 | +        for genus, abund in zip(selected, abundances) | 
|  | 148 | +    ] | 
|  | 149 | + | 
|  | 150 | + | 
|  | 151 | +# Generate full dataset by combining all samples | 
|  | 152 | +abund_df = pd.DataFrame( | 
|  | 153 | +    [row for sample in sample_types for row in make_sample(sample, genera)] | 
|  | 154 | +) | 
|  | 155 | +abund_df.head() | 
|  | 156 | + | 
|  | 157 | +# %% [markdown] | 
|  | 158 | +# ## 1. Basic Bar Plot | 
|  | 159 | +# A basic bar plot can be created by simply providing the `x` and `y` columns from the DataFrame, along with style options like `title`. | 
|  | 160 | + | 
|  | 161 | +# %% | 
|  | 162 | +# Create a df with unique samples and their genera counts | 
|  | 163 | +bar_plot_basic_df = abund_df.drop_duplicates(subset="Sample")[ | 
|  | 164 | +    ["Sample", "Genera_Count"] | 
|  | 165 | +] | 
|  | 166 | + | 
|  | 167 | +# Define output path for the basic png plot | 
|  | 168 | +file_path_basic_png = Path(output_dir) / "bar_plot_basic.png" | 
|  | 169 | + | 
|  | 170 | +# Generate the basic bar plot | 
|  | 171 | +bar_plot_basic = create_bar_plot( | 
|  | 172 | +    data=bar_plot_basic_df, | 
|  | 173 | +    x="Sample", | 
|  | 174 | +    y="Genera_Count", | 
|  | 175 | +    title="Genera Count by Sample Type", | 
|  | 176 | +    file_path=file_path_basic_png, | 
|  | 177 | +) | 
|  | 178 | + | 
|  | 179 | +bar_plot_basic.show() | 
|  | 180 | + | 
|  | 181 | +# %% [markdown] | 
|  | 182 | +# ## 2. Advanced Bar Plot | 
|  | 183 | +# Here is an example of an advanced `stacked bar plot` with more descriptive parameters, including `color grouping`, `text annotations`, `hover tooltips`, and export to `HTML`. | 
|  | 184 | + | 
|  | 185 | +# %% | 
|  | 186 | +# Define the output file path for the HTML plot | 
|  | 187 | +file_path_adv_html = Path(output_dir) / "bar_plot_advanced.html" | 
|  | 188 | + | 
|  | 189 | +# Generate the advanced stacked bar plot | 
|  | 190 | +bar_plot_adv = create_bar_plot( | 
|  | 191 | +    data=abund_df, | 
|  | 192 | +    x="Sample", | 
|  | 193 | +    y="Relative_Abundance", | 
|  | 194 | +    color="Genus", | 
|  | 195 | +    barmode="stack", | 
|  | 196 | +    title="Taxonomic Profile of Environmental Samples", | 
|  | 197 | +    subtitle="Relative Abundance of Bacterial Genera", | 
|  | 198 | +    labels={ | 
|  | 199 | +        "Sample": "Environmental Sample Type", | 
|  | 200 | +        "Relative_Abundance": "Relative Abundance (%)", | 
|  | 201 | +        "Genus": "Genus", | 
|  | 202 | +    }, | 
|  | 203 | +    hover_name="Genus", | 
|  | 204 | +    hover_data=["Relative_Abundance"], | 
|  | 205 | +    opacity=0.9, | 
|  | 206 | +    file_path=file_path_adv_html, | 
|  | 207 | +) | 
|  | 208 | + | 
|  | 209 | +bar_plot_adv.show() | 
0 commit comments