From 6c7c08d4af62785d867a4f3b0b69c1655d9c3ca3 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 11:07:11 +0200 Subject: [PATCH 01/10] =?UTF-8?q?=E2=9C=A8=20Feat(vuecore/schemas/basic/bo?= =?UTF-8?q?x.py):=20Create=20Pydantic=20schema=20of=20the=20box=20plot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/vuecore/schemas/basic/box.py | 130 +++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 src/vuecore/schemas/basic/box.py diff --git a/src/vuecore/schemas/basic/box.py b/src/vuecore/schemas/basic/box.py new file mode 100644 index 0000000..50f9d4e --- /dev/null +++ b/src/vuecore/schemas/basic/box.py @@ -0,0 +1,130 @@ +from typing import Dict, List, Optional +from pydantic import BaseModel, Field, ConfigDict + + +class BoxConfig(BaseModel): + """ + Pydantic model for validating and managing box plot configurations. + + This model serves as a curated API for the most relevant parameters + for box plots, closely aligned with the `plotly.express.box` API + (https://plotly.com/python-api-reference/generated/plotly.express.box.html). + + It includes key parameters for data mapping, styling, and layout. It ensures + that user-provided configurations are type-safe and adhere to the expected + structure. The plotting function handles parameters defined here, and also + accepts additional Plotly keyword arguments, forwarding them to the + appropriate `plotly.express.box` or `plotly.graph_objects.Figure` call. + + Attributes + ---------- + -----Data Mapping----- + x : Optional[str] + Column for the x-axis values. + y : Optional[str] + Column for the y-axis values. + color : Optional[str] + Column to assign color to box plots. + hover_name : Optional[str] + Column to appear in bold in the hover tooltip. + hover_data : List[str] + Additional columns to display in the hover tooltip. + facet_row : Optional[str] + Column to create vertical subplots (facets). + facet_col : Optional[str] + Column to create horizontal subplots (facets). + labels : Optional[Dict[str, str]] + Dictionary to override column names for titles, legends, etc. + color_discrete_map : Optional[Dict[str, str]] + Specific color mappings for values in the `color` column. + category_orders : Optional[Dict[str, List[str]]] + Dictionary to specify the order of categorical values. + -----Styling and Layout----- + orientation: str + Orientation of the box plots ('v' for vertical, 'h' for horizontal). + boxmode : str + Mode for grouping boxes ('group' or 'overlay'). + log_x : bool + If True, the x-axis is log-scaled. + log_y : bool + If True, the y-axis is log-scaled. + range_x : Optional[List[float]] + Range for the x-axis, e.g., [0, 100]. + range_y : Optional[List[float]] + Range for the y-axis, e.g., [0, 100]. + notched : bool + If True, boxes are drawn with notches. + points : str + Method to display sample points ('outliers', 'all', 'suspectedoutliers', False). + title : str + The main title of the plot. + x_title : Optional[str] + Custom title for the x-axis. + y_title : Optional[str] + Custom title for the y-axis. + subtitle : Optional[str] + The subtitle of the plot. + template : str + Plotly template for styling (e.g., 'plotly_white'). + width : int + Width of the plot in pixels. + height : int + Height of the plot in pixels. + """ + + # General Configuration + # Allow extra parameters to pass through to Plotly + model_config = ConfigDict(extra="allow") + + # Data Mapping + x: Optional[str] = Field(None, description="Column for x-axis values.") + y: Optional[str] = Field(None, description="Column for y-axis values.") + color: Optional[str] = Field(None, description="Column to assign color to boxes.") + hover_name: Optional[str] = Field( + None, description="Column for bold text in hover tooltip." + ) + hover_data: List[str] = Field( + [], description="Additional columns for the hover tooltip." + ) + facet_row: Optional[str] = Field( + None, description="Column to create vertical subplots." + ) + facet_col: Optional[str] = Field( + None, description="Column to create horizontal subplots." + ) + labels: Optional[Dict[str, str]] = Field( + None, description="Override column names in the plot." + ) + color_discrete_map: Optional[Dict[str, str]] = Field( + None, description="Map values to specific colors." + ) + category_orders: Optional[Dict[str, List[str]]] = Field( + None, description="Dictionary to specify the order of categorical values." + ) + + # Styling and Layout + orientation: Optional[str] = Field( + None, + description="Orientation of the box plots ('v' for vertical, 'h' for horizontal).", + ) + boxmode: str = Field("group", description="Mode for grouping boxes.") + log_x: bool = Field(False, description="If True, use a logarithmic x-axis.") + log_y: bool = Field(False, description="If True, use a logarithmic y-axis.") + range_x: Optional[List[float]] = Field( + None, description="Range for the x-axis, e.g., [0, 100]." + ) + range_y: Optional[List[float]] = Field( + None, description="Range for the y-axis, e.g., [0, 100]." + ) + notched: bool = Field(False, description="If True, boxes are drawn with notches.") + points: str = Field( + "outliers", + description="Method to display sample points ('outliers', 'all', 'suspectedoutliers', False).", + ) + title: str = Field("Box Plot", description="The main title of the plot.") + x_title: Optional[str] = Field(None, description="Custom title for the x-axis.") + y_title: Optional[str] = Field(None, description="Custom title for the y-axis.") + subtitle: Optional[str] = Field(None, description="The subtitle of the plot.") + template: str = Field("plotly_white", description="Plotly template for styling.") + width: Optional[int] = Field(800, description="Width of the plot in pixels.") + height: Optional[int] = Field(600, description="Height of the plot in pixels.") From 1af13539335428e987f61e15264232c5d164d41f Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 12:21:27 +0200 Subject: [PATCH 02/10] =?UTF-8?q?=E2=9C=A8=20Feat(vuecore/engines/plotly/b?= =?UTF-8?q?ox.py):=20Create=20script=20with=20build=20function=20for=20box?= =?UTF-8?q?=20plot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/vuecore/engines/plotly/box.py | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/vuecore/engines/plotly/box.py diff --git a/src/vuecore/engines/plotly/box.py b/src/vuecore/engines/plotly/box.py new file mode 100644 index 0000000..c7b2ffe --- /dev/null +++ b/src/vuecore/engines/plotly/box.py @@ -0,0 +1,68 @@ +# vuecore/engines/plotly/box.py + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go + +from vuecore.schemas.basic.box import BoxConfig +from .theming import apply_box_theme + + +def build(data: pd.DataFrame, config: BoxConfig) -> go.Figure: + """ + Creates a Plotly box plot figure from a DataFrame and a Pydantic configuration. + + This function acts as a bridge between the abstract plot definition and the + Plotly Express implementation. It translates the validated `BoxConfig` + into the arguments for `plotly.express.box` and also forwards any + additional, unvalidated keyword arguments from Plotly. The resulting figure + is then customized with layout and theme settings using `plotly.graph_objects`. + (https://plotly.com/python-api-reference/generated/plotly.express.box.html). + + Parameters + ---------- + data : pd.DataFrame + The DataFrame containing the plot data. + config : BoxConfig + The validated Pydantic model with all plot configurations. + + Returns + ------- + go.Figure + A `plotly.graph_objects.Figure` object representing the box plot. + """ + # Get all parameters from the config model, including extras + all_config_params = config.model_dump() + + # Define parameters handled by the theme script + theming_params = [ + "boxmode", + "log_x", + "log_y", + "range_x", + "range_y", + "notched", + "points", + "title", + "x_title", + "y_title", + "subtitle", + "template", + "width", + "height", + ] + + # Create the dictionary of arguments for px.box + plot_args = { + k: v + for k, v in all_config_params.items() + if k not in theming_params and v is not None + } + + # Create the base figure using only the arguments relevant to px.box + fig = px.box(data, **plot_args) + + # Apply theme and additional styling to the generated figure. + fig = apply_box_theme(fig, config) + + return fig From 42a3aecd79b7d76f5316b80d9470ffb08425544e Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 12:28:57 +0200 Subject: [PATCH 03/10] =?UTF-8?q?=E2=9C=A8=20Feat(vuecore/engines/plotly/t?= =?UTF-8?q?heming.py):=20Add=20apply=5Fbox=5Ftheme=20function=20to=20the?= =?UTF-8?q?=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/vuecore/engines/plotly/theming.py | 56 +++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/vuecore/engines/plotly/theming.py b/src/vuecore/engines/plotly/theming.py index cc99df7..6243eb2 100644 --- a/src/vuecore/engines/plotly/theming.py +++ b/src/vuecore/engines/plotly/theming.py @@ -3,6 +3,7 @@ from vuecore.schemas.basic.scatter import ScatterConfig from vuecore.schemas.basic.line import LineConfig from vuecore.schemas.basic.bar import BarConfig +from vuecore.schemas.basic.box import BoxConfig def apply_scatter_theme(fig: go.Figure, config: ScatterConfig) -> go.Figure: @@ -157,3 +158,58 @@ def apply_bar_theme(fig: go.Figure, config: BarConfig) -> go.Figure: barmode=config.barmode, ) return fig + + +def apply_box_theme(fig: go.Figure, config: BoxConfig) -> go.Figure: + """ + Applies a consistent layout and theme to a Plotly box plot. + + This function handles all styling and layout adjustments, such as titles, + dimensions, templates, and trace properties, separating these concerns + from the initial data mapping. + + Parameters + ---------- + fig : go.Figure + The Plotly figure object to be styled. + config : BoxConfig + The configuration object containing all styling and layout info. + + Returns + ------- + go.Figure + The styled Plotly figure object. + """ + # Apply trace-specific updates for box plots + fig.update_traces( + boxpoints=config.points, notched=config.notched, selector=dict(type="box") + ) + + # Use the labels dictionary to set axis titles, falling back to defaults + x_title = config.x_title or ( + config.labels.get(config.x) + if config.x and config.labels + else None or (config.x.title() if config.x else None) + ) + y_title = config.y_title or ( + config.labels.get(config.y) + if config.y and config.labels + else None or (config.y.title() if config.y else None) + ) + + # Apply layout updates for box plot + fig.update_layout( + title_text=config.title, + title_subtitle_text=config.subtitle, + xaxis_title=x_title, + yaxis_title=y_title, + height=config.height, + width=config.width, + template=config.template, + xaxis_type="log" if config.log_x else None, + yaxis_type="log" if config.log_y else None, + xaxis_range=config.range_x, + yaxis_range=config.range_y, + boxmode=config.boxmode, + ) + return fig From 4a6ba25be9037c1081e2e1326421cc25b2b699ff Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 12:36:19 +0200 Subject: [PATCH 04/10] =?UTF-8?q?=E2=9C=A8=20Feat(vuecore/engines/plotly/?= =?UTF-8?q?=5F=5Finit=5F=5F.py):=20Register=20box=20plot=20builder=20and?= =?UTF-8?q?=20add=20it=20to=20the=20PlotType=20StrEnum?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/vuecore/constants.py | 1 + src/vuecore/engines/plotly/__init__.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/vuecore/constants.py b/src/vuecore/constants.py index 2c2003e..6a45481 100644 --- a/src/vuecore/constants.py +++ b/src/vuecore/constants.py @@ -13,6 +13,7 @@ class PlotType(StrEnum): SCATTER = auto() LINE = auto() BAR = auto() + BOX = auto() class EngineType(StrEnum): diff --git a/src/vuecore/engines/plotly/__init__.py b/src/vuecore/engines/plotly/__init__.py index 0b3d7ab..eb8088e 100644 --- a/src/vuecore/engines/plotly/__init__.py +++ b/src/vuecore/engines/plotly/__init__.py @@ -4,6 +4,7 @@ from .scatter import build as build_scatter from .line import build as build_line from .bar import build as build_bar +from .box import build as build_box from .saver import save # Register the functions with the central dispatcher @@ -12,5 +13,6 @@ ) register_builder(plot_type=PlotType.LINE, engine=EngineType.PLOTLY, func=build_line) register_builder(plot_type=PlotType.BAR, engine=EngineType.PLOTLY, func=build_bar) +register_builder(plot_type=PlotType.BOX, engine=EngineType.PLOTLY, func=build_box) register_saver(engine=EngineType.PLOTLY, func=save) From 8403784beb52b9c182231b4cc2fe9e056e8fb8ab Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 12:48:32 +0200 Subject: [PATCH 05/10] =?UTF-8?q?=E2=9C=A8=20Feat(vuecore/plots/box.py):?= =?UTF-8?q?=20Create=20script=20with=20the=20user-facing=20function=20for?= =?UTF-8?q?=20the=20box=20plot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/vuecore/plots/basic/box.py | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/vuecore/plots/basic/box.py diff --git a/src/vuecore/plots/basic/box.py b/src/vuecore/plots/basic/box.py new file mode 100644 index 0000000..5d0126a --- /dev/null +++ b/src/vuecore/plots/basic/box.py @@ -0,0 +1,83 @@ +from typing import Any + +import pandas as pd + +from vuecore import EngineType +from vuecore.engines import get_builder, get_saver +from vuecore.schemas.basic.box import BoxConfig + + +def create_box_plot( + data: pd.DataFrame, + engine: EngineType = EngineType.PLOTLY, + file_path: str = None, + **kwargs, +) -> Any: + """ + Creates, styles, and optionally saves a box plot using the specified engine. + + This function serves as the main entry point for users to generate box plots. + It validates the provided configuration against the BoxConfig schema, + retrieves the appropriate plotting builder and saver functions based on the + selected engine, builds the plot, and optionally saves it to a file. + + Parameters + ---------- + data : pd.DataFrame + The DataFrame containing the data to be plotted. Each row represents + an observation, and columns correspond to variables. + engine : EngineType, optional + The plotting engine to use for rendering the plot. + Defaults to `EngineType.PLOTLY`. + file_path : str, optional + If provided, the path where the final plot will be saved. + The file format is automatically inferred from the file extension + (e.g., '.html', '.png', '.jpeg', '.svg'). Defaults to None, meaning + the plot will not be saved. + **kwargs + Keyword arguments for plot configuration. These arguments are + validated against the `BoxConfig` Pydantic model. Refer to + `vuecore.schemas.basic.box.BoxConfig` for all available + options and their descriptions. + + Returns + ------- + Any + The final plot object returned by the selected engine. + For Plotly, this will typically be a `plotly.graph_objects.Figure`. + The exact type depends on the chosen engine. + + Raises + ------ + pydantic.ValidationError + If the provided keyword arguments do not conform to the `BoxConfig` schema. + e.g., a required parameter is missing or a value has an incorrect type. + ValueError + Raised by the plotting engine (e.g., Plotly Express) if a + column specified in the configuration (e.g., 'x', 'y', 'color') is + not found in the provided DataFrame. + + Examples + -------- + For detailed examples and usage, please refer to the documentation: + + * **Jupyter Notebook:** `docs/api_examples/box_plot.ipynb` - + https://vuecore.readthedocs.io/en/latest/api_examples/box_plot.html + * **Python Script:** `docs/api_examples/box_plot.py` - + https://github.com/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/box_plot.py + """ + # 1. Validate configuration using Pydantic. + config = BoxConfig(**kwargs) + + # 2. Get the correct builder function from the registry. + builder_func = get_builder(plot_type="box", engine=engine) + + # 3. Build the figure object. + figure = builder_func(data, config) + + # 4. Save the plot using the correct saver function, if a file_path is provided. + if file_path: + saver_func = get_saver(engine=engine) + saver_func(figure, file_path) + + return figure From f2c0f38a6f2515a4c10c200b680a1700fa4ec7bb Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 13:51:00 +0200 Subject: [PATCH 06/10] =?UTF-8?q?=E2=9C=A8=20Feat(docs/api=5Fexamples/box?= =?UTF-8?q?=5Fplot.ipynb):=20Create=20notebook=20api=20example=20for=20box?= =?UTF-8?q?=20plot=20and=20sync=20it=20with=20a=20python=20script?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/api_examples/box_plot.ipynb | 4326 ++++++++++++++++++++++++++++++ docs/api_examples/box_plot.py | 163 ++ src/vuecore/plots/basic/box.py | 88 +- 3 files changed, 4533 insertions(+), 44 deletions(-) create mode 100644 docs/api_examples/box_plot.ipynb create mode 100644 docs/api_examples/box_plot.py diff --git a/docs/api_examples/box_plot.ipynb b/docs/api_examples/box_plot.ipynb new file mode 100644 index 0000000..a30132d --- /dev/null +++ b/docs/api_examples/box_plot.ipynb @@ -0,0 +1,4326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "640ba1d5", + "metadata": {}, + "source": [ + "# Box Plot\n", + "\n", + "![VueCore logo][vuecore_logo]\n", + "\n", + "[![Open In Colab][colab_badge]][colab_link]\n", + "\n", + "[VueCore][vuecore_repo] is a Python package for creating interactive and static visualizations of multi-omics data.\n", + "It is part of a broader ecosystem of tools—including [ACore][acore_repo] for data processing and [VueGen][vuegen_repo] for automated reporting—that together enable end-to-end workflows for omics analysis.\n", + "\n", + "This notebook demonstrates how to generate box plots using plotting functions from VueCore. We showcase basic and advanced plot configurations, highlighting key customization options such as grouping, color mapping, text annotations, and export to multiple file formats.\n", + "\n", + "## Notebook structure\n", + "\n", + "First, we will set up the work environment by installing the necessary packages and importing the required libraries. Next, we will create basic and advanced box plots.\n", + "\n", + "0. [Work environment setup](#0-work-environment-setup)\n", + "1. [Basic box plot](#1-basic-box-plot)\n", + "2. [Advanced box plot](#2-advanced-box-plot)\n", + "\n", + "## Credits and Contributors\n", + "- This notebook was created by Sebastián Ayala-Ruano under the supervision of Henry Webel and Alberto Santos, head of the [Multiomics Network Analytics Group (MoNA)][Mona] at the [Novo Nordisk Foundation Center for Biosustainability (DTU Biosustain)][Biosustain].\n", + "- You can find more details about the project in this [GitHub repository][vuecore_repo].\n", + "\n", + "[colab_badge]: https://colab.research.google.com/assets/colab-badge.svg\n", + "[colab_link]: https://colab.research.google.com/github/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/box_plot.ipynb\n", + "[vuecore_logo]: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/main/docs/images/logo/vuecore_logo.svg\n", + "[Mona]: https://multiomics-analytics-group.github.io/\n", + "[Biosustain]: https://www.biosustain.dtu.dk/\n", + "[vuecore_repo]: https://github.com/Multiomics-Analytics-Group/vuecore\n", + "[vuegen_repo]: https://github.com/Multiomics-Analytics-Group/vuegen\n", + "[acore_repo]: https://github.com/Multiomics-Analytics-Group/acore" + ] + }, + { + "cell_type": "markdown", + "id": "3b504dfb", + "metadata": {}, + "source": [ + "## 0. Work environment setup" + ] + }, + { + "cell_type": "markdown", + "id": "f0c056a7", + "metadata": {}, + "source": [ + "### 0.1. Installing libraries and creating global variables for platform and working directory\n", + "\n", + "To run this notebook locally, you should create a virtual environment with the required libraries. If you are running this notebook on Google Colab, everything should be set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36246ed6", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "# VueCore library\n", + "%pip install vuecore" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "963a9529", + "metadata": { + "tags": [ + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "IN_COLAB = \"COLAB_GPU\" in os.environ" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ee2ffd40", + "metadata": { + "tags": [ + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "# Create a directory for outputs\n", + "output_dir = \"./outputs\"\n", + "os.makedirs(output_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "31638f9a", + "metadata": {}, + "source": [ + "### 0.2. Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "06dbf6a2", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "\n", + "from vuecore.plots.basic.box import create_box_plot" + ] + }, + { + "cell_type": "markdown", + "id": "5cc60050", + "metadata": {}, + "source": [ + "### 0.3. Create sample data\n", + "We create a synthetic dataset simulating gene expression levels across different patient samples and treatment conditions, with each data point representing a unique gene's expression level under a specific treatment for a particular patient." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ac2db647", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sample_IDTreatmentGene_IDExpression
0Patient CControlGene_64685.330321
1Patient CTreatedGene_340121.514173
2Patient CControlGene_79897.711525
3Patient CControlGene_34896.559195
4Patient CTreatedGene_385131.128275
\n", + "
" + ], + "text/plain": [ + " Sample_ID Treatment Gene_ID Expression\n", + "0 Patient C Control Gene_646 85.330321\n", + "1 Patient C Treated Gene_340 121.514173\n", + "2 Patient C Control Gene_798 97.711525\n", + "3 Patient C Control Gene_348 96.559195\n", + "4 Patient C Treated Gene_385 131.128275" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set a random seed for reproducibility of the synthetic data\n", + "np.random.seed(42)\n", + "\n", + "# Define number of samples, sample groups, and treatments\n", + "num_samples = 200\n", + "sample_groups = [\"Patient A\", \"Patient B\", \"Patient C\", \"Patient D\"]\n", + "treatments = [\"Control\", \"Treated\"]\n", + "\n", + "# Generate synthetic gene expression data\n", + "records = []\n", + "for sample_group in np.random.choice(sample_groups, num_samples):\n", + " for treatment in np.random.choice(treatments, num_samples // (len(sample_groups) * len(treatments))):\n", + " base_expression = np.random.normal(loc=100, scale=10)\n", + " expression = base_expression + (np.random.normal(loc=20, scale=5) if treatment == \"Treated\" else 0)\n", + " records.append({\n", + " \"Sample_ID\": sample_group,\n", + " \"Treatment\": treatment,\n", + " \"Gene_ID\": f\"Gene_{np.random.randint(1, 1000)}\",\n", + " \"Expression\": expression\n", + " })\n", + "\n", + "gene_exp_df = pd.DataFrame(records)\n", + "gene_exp_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "ade445fe", + "metadata": {}, + "source": [ + "## 1. Basic Box Plot\n", + "A basic box plot can be created by simply providing the `x` and `y` columns from the DataFrame, along with style options like `title`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d0d34455", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[VueCore] Plot saved to outputs/box_plot_basic.png\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define output file path for the PNG plot\n", + "file_path_basic_png = Path(output_dir) / \"box_plot_basic.png\"\n", + "\n", + "# Generate the basic box plot\n", + "box_plot_basic = create_box_plot(\n", + " data=gene_exp_df,\n", + " x=\"Treatment\",\n", + " y=\"Expression\",\n", + " title=\"Gene Expression Levels by Treatment\",\n", + " file_path=file_path_basic_png,\n", + ")\n", + "\n", + "box_plot_basic.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f5e16637", + "metadata": {}, + "source": [ + "## 2. Advanced Box Plot\n", + "Here is an example of an advanced box plot with more descriptive parameters, including `color and box grouping`, `text annotations`, `hover tooltips`, and export to `HTML`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "358e45fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[VueCore] Plot saved to outputs/box_plot_advanced.html\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Define the output file path for the advanced HTML plot\n", + "file_path_adv_html = Path(output_dir) / \"box_plot_advanced.html\"\n", + "\n", + "# Generate the advanced box plot\n", + "box_plot_adv = create_box_plot(\n", + " data=gene_exp_df,\n", + " x=\"Treatment\",\n", + " y=\"Expression\",\n", + " color=\"Sample_ID\",\n", + " boxmode=\"group\",\n", + " notched=True,\n", + " title=\"Gene Expression Levels with Control and Treatment Condition\",\n", + " subtitle=\"Distribution of gene expression across different treatments and patient samples\",\n", + " labels={\n", + " \"Treatment\": \"Treatment\",\n", + " \"Expression\": \"Gene Expression\",\n", + " \"Sample_ID\": \"Patient Sample ID\",\n", + " },\n", + " color_discrete_map={\n", + " \"Patient A\": \"#508AA8\",\n", + " \"Patient B\": \"#A8505E\",\n", + " \"Patient C\": \"#86BF84\",\n", + " \"Patient D\": \"#A776AF\",\n", + " },\n", + " category_orders={\"Sample_ID\": [\"Patient A\", \"Patient B\", \"Patient C\", \"Patient D\"]},\n", + " hover_data=[\"Gene_ID\"],\n", + " file_path=file_path_adv_html,\n", + ")\n", + "\n", + "box_plot_adv.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "vuecore-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/api_examples/box_plot.py b/docs/api_examples/box_plot.py new file mode 100644 index 0000000..f5dc6b6 --- /dev/null +++ b/docs/api_examples/box_plot.py @@ -0,0 +1,163 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.2 +# kernelspec: +# display_name: vuecore-dev +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Box Plot +# +# ![VueCore logo][vuecore_logo] +# +# [![Open In Colab][colab_badge]][colab_link] +# +# [VueCore][vuecore_repo] is a Python package for creating interactive and static visualizations of multi-omics data. +# It is part of a broader ecosystem of tools—including [ACore][acore_repo] for data processing and [VueGen][vuegen_repo] for automated reporting—that together enable end-to-end workflows for omics analysis. +# +# This notebook demonstrates how to generate box plots using plotting functions from VueCore. We showcase basic and advanced plot configurations, highlighting key customization options such as grouping, color mapping, text annotations, and export to multiple file formats. +# +# ## Notebook structure +# +# First, we will set up the work environment by installing the necessary packages and importing the required libraries. Next, we will create basic and advanced box plots. +# +# 0. [Work environment setup](#0-work-environment-setup) +# 1. [Basic box plot](#1-basic-box-plot) +# 2. [Advanced box plot](#2-advanced-box-plot) +# +# ## Credits and Contributors +# - This notebook was created by Sebastián Ayala-Ruano under the supervision of Henry Webel and Alberto Santos, head of the [Multiomics Network Analytics Group (MoNA)][Mona] at the [Novo Nordisk Foundation Center for Biosustainability (DTU Biosustain)][Biosustain]. +# - You can find more details about the project in this [GitHub repository][vuecore_repo]. +# +# [colab_badge]: https://colab.research.google.com/assets/colab-badge.svg +# [colab_link]: https://colab.research.google.com/github/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/box_plot.ipynb +# [vuecore_logo]: https://raw.githubusercontent.com/Multiomics-Analytics-Group/vuecore/main/docs/images/logo/vuecore_logo.svg +# [Mona]: https://multiomics-analytics-group.github.io/ +# [Biosustain]: https://www.biosustain.dtu.dk/ +# [vuecore_repo]: https://github.com/Multiomics-Analytics-Group/vuecore +# [vuegen_repo]: https://github.com/Multiomics-Analytics-Group/vuegen +# [acore_repo]: https://github.com/Multiomics-Analytics-Group/acore + +# %% [markdown] +# ## 0. Work environment setup + +# %% [markdown] +# ### 0.1. Installing libraries and creating global variables for platform and working directory +# +# To run this notebook locally, you should create a virtual environment with the required libraries. If you are running this notebook on Google Colab, everything should be set. + +# %% tags=["hide-output"] +# VueCore library +# %pip install vuecore + +# %% tags=["hide-cell"] +import os + +IN_COLAB = "COLAB_GPU" in os.environ + +# %% tags=["hide-cell"] +# Create a directory for outputs +output_dir = "./outputs" +os.makedirs(output_dir, exist_ok=True) + +# %% [markdown] +# ### 0.2. Importing libraries + +# %% +# Imports +import pandas as pd +import numpy as np +from pathlib import Path + +from vuecore.plots.basic.box import create_box_plot + +# %% [markdown] +# ### 0.3. Create sample data +# We create a synthetic dataset simulating gene expression levels across different patient samples and treatment conditions, with each data point representing a unique gene's expression level under a specific treatment for a particular patient. + +# %% +# Set a random seed for reproducibility of the synthetic data +np.random.seed(42) + +# Define number of samples, sample groups, and treatments +num_samples = 200 +sample_groups = ["Patient A", "Patient B", "Patient C", "Patient D"] +treatments = ["Control", "Treated"] + +# Generate synthetic gene expression data +records = [] +for sample_group in np.random.choice(sample_groups, num_samples): + for treatment in np.random.choice(treatments, num_samples // (len(sample_groups) * len(treatments))): + base_expression = np.random.normal(loc=100, scale=10) + expression = base_expression + (np.random.normal(loc=20, scale=5) if treatment == "Treated" else 0) + records.append({ + "Sample_ID": sample_group, + "Treatment": treatment, + "Gene_ID": f"Gene_{np.random.randint(1, 1000)}", + "Expression": expression + }) + +gene_exp_df = pd.DataFrame(records) +gene_exp_df.head() + +# %% [markdown] +# ## 1. Basic Box Plot +# A basic box plot can be created by simply providing the `x` and `y` columns from the DataFrame, along with style options like `title`. + +# %% +# Define output file path for the PNG plot +file_path_basic_png = Path(output_dir) / "box_plot_basic.png" + +# Generate the basic box plot +box_plot_basic = create_box_plot( + data=gene_exp_df, + x="Treatment", + y="Expression", + title="Gene Expression Levels by Treatment", + file_path=file_path_basic_png, +) + +box_plot_basic.show() + +# %% [markdown] +# ## 2. Advanced Box Plot +# Here is an example of an advanced box plot with more descriptive parameters, including `color and box grouping`, `text annotations`, `hover tooltips`, and export to `HTML`. + +# %% +# Define the output file path for the advanced HTML plot +file_path_adv_html = Path(output_dir) / "box_plot_advanced.html" + +# Generate the advanced box plot +box_plot_adv = create_box_plot( + data=gene_exp_df, + x="Treatment", + y="Expression", + color="Sample_ID", + boxmode="group", + notched=True, + title="Gene Expression Levels with Control and Treatment Condition", + subtitle="Distribution of gene expression across different treatments and patient samples", + labels={ + "Treatment": "Treatment", + "Expression": "Gene Expression", + "Sample_ID": "Patient Sample ID", + }, + color_discrete_map={ + "Patient A": "#508AA8", + "Patient B": "#A8505E", + "Patient C": "#86BF84", + "Patient D": "#A776AF", + }, + category_orders={"Sample_ID": ["Patient A", "Patient B", "Patient C", "Patient D"]}, + hover_data=["Gene_ID"], + file_path=file_path_adv_html, +) + +box_plot_adv.show() diff --git a/src/vuecore/plots/basic/box.py b/src/vuecore/plots/basic/box.py index 5d0126a..f2e0359 100644 --- a/src/vuecore/plots/basic/box.py +++ b/src/vuecore/plots/basic/box.py @@ -14,57 +14,57 @@ def create_box_plot( **kwargs, ) -> Any: """ - Creates, styles, and optionally saves a box plot using the specified engine. + Creates, styles, and optionally saves a box plot using the specified engine. - This function serves as the main entry point for users to generate box plots. - It validates the provided configuration against the BoxConfig schema, - retrieves the appropriate plotting builder and saver functions based on the - selected engine, builds the plot, and optionally saves it to a file. + This function serves as the main entry point for users to generate box plots. + It validates the provided configuration against the BoxConfig schema, + retrieves the appropriate plotting builder and saver functions based on the + selected engine, builds the plot, and optionally saves it to a file. - Parameters - ---------- - data : pd.DataFrame - The DataFrame containing the data to be plotted. Each row represents - an observation, and columns correspond to variables. - engine : EngineType, optional - The plotting engine to use for rendering the plot. - Defaults to `EngineType.PLOTLY`. - file_path : str, optional - If provided, the path where the final plot will be saved. - The file format is automatically inferred from the file extension - (e.g., '.html', '.png', '.jpeg', '.svg'). Defaults to None, meaning - the plot will not be saved. - **kwargs - Keyword arguments for plot configuration. These arguments are - validated against the `BoxConfig` Pydantic model. Refer to - `vuecore.schemas.basic.box.BoxConfig` for all available - options and their descriptions. + Parameters + ---------- + data : pd.DataFrame + The DataFrame containing the data to be plotted. Each row represents + an observation, and columns correspond to variables. + engine : EngineType, optional + The plotting engine to use for rendering the plot. + Defaults to `EngineType.PLOTLY`. + file_path : str, optional + If provided, the path where the final plot will be saved. + The file format is automatically inferred from the file extension + (e.g., '.html', '.png', '.jpeg', '.svg'). Defaults to None, meaning + the plot will not be saved. + **kwargs + Keyword arguments for plot configuration. These arguments are + validated against the `BoxConfig` Pydantic model. Refer to + `vuecore.schemas.basic.box.BoxConfig` for all available + options and their descriptions. - Returns - ------- - Any - The final plot object returned by the selected engine. - For Plotly, this will typically be a `plotly.graph_objects.Figure`. - The exact type depends on the chosen engine. + Returns + ------- + Any + The final plot object returned by the selected engine. + For Plotly, this will typically be a `plotly.graph_objects.Figure`. + The exact type depends on the chosen engine. - Raises - ------ - pydantic.ValidationError - If the provided keyword arguments do not conform to the `BoxConfig` schema. - e.g., a required parameter is missing or a value has an incorrect type. - ValueError - Raised by the plotting engine (e.g., Plotly Express) if a - column specified in the configuration (e.g., 'x', 'y', 'color') is - not found in the provided DataFrame. + Raises + ------ + pydantic.ValidationError + If the provided keyword arguments do not conform to the `BoxConfig` schema. + e.g., a required parameter is missing or a value has an incorrect type. + ValueError + Raised by the plotting engine (e.g., Plotly Express) if a + column specified in the configuration (e.g., 'x', 'y', 'color') is + not found in the provided DataFrame. - Examples - -------- - For detailed examples and usage, please refer to the documentation: + Examples + -------- + For detailed examples and usage, please refer to the documentation: * **Jupyter Notebook:** `docs/api_examples/box_plot.ipynb` - - https://vuecore.readthedocs.io/en/latest/api_examples/box_plot.html - * **Python Script:** `docs/api_examples/box_plot.py` - - https://github.com/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/box_plot.py + https://vuecore.readthedocs.io/en/latest/api_examples/box_plot.html + * **Python Script:** `docs/api_examples/box_plot.py` - + https://github.com/Multiomics-Analytics-Group/vuecore/blob/main/docs/api_examples/box_plot.py """ # 1. Validate configuration using Pydantic. config = BoxConfig(**kwargs) From 748762600d8c2051d2022f3c5c9f5b37e1be92d1 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 13:52:03 +0200 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=93=9D=20Docs:=20update=20index.md?= =?UTF-8?q?=20to=20add=20box=20plot=20example?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.md b/docs/index.md index 313747e..773f1b7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,6 +14,7 @@ api_examples/scatter_plot api_examples/line_plot api_examples/bar_plot +api_examples/box_plot ``` ```{toctree} From 9ef7a027760a1785461d6082b2a857389decd7b8 Mon Sep 17 00:00:00 2001 From: sayalaruano Date: Thu, 21 Aug 2025 14:12:41 +0200 Subject: [PATCH 08/10] =?UTF-8?q?=F0=9F=93=9D=20Docs:=20update=20code=20to?= =?UTF-8?q?=20generate=20synthetic=20data=20on=20box=20plot=20exmaple?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/api_examples/box_plot.ipynb | 4032 +----------------------------- docs/api_examples/box_plot.py | 45 +- 2 files changed, 65 insertions(+), 4012 deletions(-) diff --git a/docs/api_examples/box_plot.ipynb b/docs/api_examples/box_plot.ipynb index a30132d..00dcb25 100644 --- a/docs/api_examples/box_plot.ipynb +++ b/docs/api_examples/box_plot.ipynb @@ -137,115 +137,47 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 47, "id": "ac2db647", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Sample_IDTreatmentGene_IDExpression
0Patient CControlGene_64685.330321
1Patient CTreatedGene_340121.514173
2Patient CControlGene_79897.711525
3Patient CControlGene_34896.559195
4Patient CTreatedGene_385131.128275
\n", - "
" - ], - "text/plain": [ - " Sample_ID Treatment Gene_ID Expression\n", - "0 Patient C Control Gene_646 85.330321\n", - "1 Patient C Treated Gene_340 121.514173\n", - "2 Patient C Control Gene_798 97.711525\n", - "3 Patient C Control Gene_348 96.559195\n", - "4 Patient C Treated Gene_385 131.128275" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Set a random seed for reproducibility of the synthetic data\n", "np.random.seed(42)\n", "\n", - "# Define number of samples, sample groups, and treatments\n", + "# Parameters\n", "num_samples = 200\n", "sample_groups = [\"Patient A\", \"Patient B\", \"Patient C\", \"Patient D\"]\n", "treatments = [\"Control\", \"Treated\"]\n", "\n", - "# Generate synthetic gene expression data\n", - "records = []\n", - "for sample_group in np.random.choice(sample_groups, num_samples):\n", - " for treatment in np.random.choice(treatments, num_samples // (len(sample_groups) * len(treatments))):\n", - " base_expression = np.random.normal(loc=100, scale=10)\n", - " expression = base_expression + (np.random.normal(loc=20, scale=5) if treatment == \"Treated\" else 0)\n", - " records.append({\n", - " \"Sample_ID\": sample_group,\n", - " \"Treatment\": treatment,\n", - " \"Gene_ID\": f\"Gene_{np.random.randint(1, 1000)}\",\n", - " \"Expression\": expression\n", - " })\n", + "# Sample metadata\n", + "sample_ids = np.random.choice(sample_groups, size=num_samples)\n", + "treatment_assignments = np.random.choice(treatments, size=num_samples)\n", + "gene_ids = [f\"Gene_{g}\" for g in np.random.randint(1, 1500, size=num_samples)]\n", + "\n", + "# Base expression values\n", + "base_expr = np.random.normal(loc=100, scale=35, size=num_samples)\n", + "\n", + "# Treatment effect simulation\n", + "treatment_effect = np.where(\n", + " treatment_assignments == \"Treated\",\n", + " np.random.normal(loc=50, scale=30, size=num_samples),\n", + " 0\n", + ")\n", + "\n", + "# Small random per-gene offset for extra variability\n", + "gene_offset = np.random.normal(loc=0, scale=20, size=num_samples)\n", "\n", - "gene_exp_df = pd.DataFrame(records)\n", - "gene_exp_df.head()" + "# Final expression\n", + "expr = base_expr + treatment_effect + gene_offset\n", + "\n", + "# Construct DataFrame\n", + "gene_exp_df = pd.DataFrame({\n", + " \"Sample_ID\": sample_ids,\n", + " \"Treatment\": treatment_assignments,\n", + " \"Gene_ID\": gene_ids,\n", + " \"Expression\": expr\n", + "})" ] }, { @@ -259,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 48, "id": "d0d34455", "metadata": {}, "outputs": [ @@ -273,3901 +205,9 @@ { "data": { "text/html": [ - " \n", - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "