From 9e710f817986326c3e68b7f05f99d5c93bef61bb Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 11:02:13 -0400 Subject: [PATCH 01/16] adding image dimensions to landscape parameters + checking duplicate genes --- src/celldega/pre/__init__.py | 46 +++++++++++++++++++++++--- src/celldega/pre/run_pre_processing.py | 7 +++- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index d98f30e4..47206285 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -662,6 +662,40 @@ def _load_meta_cell_by_technology(technology, path_meta_cell_micron): return meta_cell +def _align_and_deduplicate_genes(cbg_custom: pd.DataFrame, path_landscape_files: str) -> pd.DataFrame: + """ + Ensures all genes from meta_gene.parquet are present in cbg_custom DataFrame. + Adds missing genes with value 0 and removes duplicate columns (keeps the first occurrence). + + Parameters: + ----------- + cbg_custom : pd.DataFrame + DataFrame containing custom cell-by-gene matrix. + path_landscape_files : str + Path to directory containing meta_gene.parquet. + + Returns: + -------- + pd.DataFrame + Cleaned cbg_custom DataFrame with all expected genes and no duplicate columns. + """ + meta_gene_path = Path(path_landscape_files) / "meta_gene.parquet" + meta_gene = pd.read_parquet(meta_gene_path) + + # Add missing gene columns with default value 0 + missing_cols = meta_gene.index.difference(cbg_custom.columns) + for col in missing_cols: + cbg_custom[col] = 0 + + # Optional: check for duplicates in meta_gene index + duplicated_genes = meta_gene.index[meta_gene.index.duplicated()].unique() + if not duplicated_genes.empty: + print(f"Warning: Duplicate genes found in meta_gene index: {list(duplicated_genes)}") + + # Remove duplicate columns from cbg_custom + cbg_custom = cbg_custom.loc[:, ~cbg_custom.columns.duplicated()] + + return cbg_custom def make_meta_cell_image_coord( technology, @@ -800,6 +834,8 @@ def get_max_zoom_level(path_image_pyramid): def save_landscape_parameters( technology, path_landscape_files, + image_width, + image_height, image_name="dapi_files", tile_size=1000, image_info=None, @@ -844,6 +880,7 @@ def save_landscape_parameters( "tile_size": "N.A.", "image_info": image_info, "image_format": image_format, + "image_dimensions": {'width': image_width, 'height': image_height}, "use_int_index": "N.A.", } elif technology != "custom": @@ -854,6 +891,7 @@ def save_landscape_parameters( "tile_size": tile_size, "image_info": image_info, "image_format": image_format, + "image_dimensions": {'width': image_width, 'height': image_height}, "use_int_index": use_int_index, } else: @@ -884,11 +922,7 @@ def add_custom_segmentation( cbg_custom = pd.read_parquet(Path(path_segmentation_files) / "cell_by_gene_matrix.parquet") - # make sure all genes are present in cbg_custom - meta_gene = pd.read_parquet(Path(path_landscape_files) / "meta_gene.parquet") - missing_cols = meta_gene.index.difference(cbg_custom.columns) - for col in missing_cols: - cbg_custom[col] = 0 + cbg_custom = _align_and_deduplicate_genes(cbg_custom, path_landscape_files) make_meta_gene( cbg=cbg_custom, @@ -959,6 +993,8 @@ def add_custom_segmentation( save_landscape_parameters( technology=segmentation_parameters["technology"], path_landscape_files=path_landscape_files, + image_width=width, + image_height=height, image_name="dapi_files", tile_size=tile_size, image_format=".webp", diff --git a/src/celldega/pre/run_pre_processing.py b/src/celldega/pre/run_pre_processing.py index b9c0a515..37076f56 100644 --- a/src/celldega/pre/run_pre_processing.py +++ b/src/celldega/pre/run_pre_processing.py @@ -196,6 +196,9 @@ def main( # Make meta gene files dega.pre.make_meta_gene(cbg, str(paths["meta_gene"])) + # Check if the genes are unique before saving the cbg files + cbg = dega.pre._align_and_deduplicate_genes(cbg, path_landscape_files) + # Save CBG gene parquet files dega.pre.save_cbg_gene_parquets(path_landscape_files, cbg, verbose=True) @@ -248,7 +251,9 @@ def main( dega.pre.save_landscape_parameters( technology, path_landscape_files, - "dapi_files", + image_width=tile_bounds['x_max'], + image_height=tile_bounds['y_max'], + image_name="dapi_files", tile_size=tile_size, image_info=dega.pre.get_image_info(technology, image_tile_layer), image_format=".webp", From 38c23d728e4159b43b3ebb6aa59a62e5c0b32023 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 12:40:49 -0400 Subject: [PATCH 02/16] removing technology argument from dega.viz.Landscape --- notebooks/Pre-processor_xenium_merscope.ipynb | 30 +++++-------------- src/celldega/viz/widget.py | 21 +++++++++++-- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/notebooks/Pre-processor_xenium_merscope.ipynb b/notebooks/Pre-processor_xenium_merscope.ipynb index bef7b3c1..f0ad4ae9 100644 --- a/notebooks/Pre-processor_xenium_merscope.ipynb +++ b/notebooks/Pre-processor_xenium_merscope.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "b81ab32e", "metadata": {}, "outputs": [ @@ -18,16 +18,10 @@ "name": "stdout", "output_type": "stream", "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n", "env: ANYWIDGET_HMR=1\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/feni/Documents/celldega/dega/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.5 when it was built against 1.14.6, this may cause problems\n", - " _warn((\"h5py is running against HDF5 {0} when it was built against {1}, \"\n" - ] } ], "source": [ @@ -152,36 +146,28 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "4a1320eb", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Server running on port 60648\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fcd77f2a45b047ca81bf3af1a235bc1f", + "model_id": "2094d99ad409483cb5f74bff528fc511", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Landscape(base_url='http://localhost:60648/data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test', tech…" + "Landscape(base_url='http://localhost:50952/data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test', cell…" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "landscape_ist = dega.viz.Landscape(\n", - " technology='Xenium',\n", " base_url = f\"http://localhost:{dega.viz.get_local_server()}/{path_landscape_files}\",\n", ")\n", "\n", @@ -368,7 +354,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.5" }, "toc": { "base_numbering": 1, diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 5e592ffa..efd3f1aa 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -101,6 +101,25 @@ class Landscape(anywidget.AnyWidget): height = traitlets.Int(800).tag(sync=True) def __init__(self, **kwargs): + + base_path = (kwargs.get("base_url") or "") + "/" + + if "technology" not in kwargs: + path_parameters_json = base_path + "landscape_parameters.json" + + try: + import urllib.request + with urllib.request.urlopen(path_parameters_json) as f: + landscape_parameters = json.load(f) + + kwargs["technology"] = landscape_parameters.get("technology", "sst") + except (FileNotFoundError, urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, TypeError) as e: + kwargs["technology"] = "sst" + warnings.warn( + f"Could not read technology from {path_parameters_json}. Using default 'sst'. Reason: {e}", + stacklevel=2, + ) + adata = kwargs.pop("adata", None) or kwargs.pop("AnnData", None) pq_meta_cell = kwargs.pop("meta_cell_parquet", None) pq_meta_cluster = kwargs.pop("meta_cluster_parquet", None) @@ -115,8 +134,6 @@ def __init__(self, **kwargs): meta_cluster_df = None cell_attr = kwargs.pop("cell_attr", ["leiden"]) - base_path = (kwargs.get("base_url") or "") + "/" - path_transformation_matrix = base_path + "micron_to_image_transform.csv" try: From c8d8a7a6b41b723fa0db82ad12d05292f00a8458 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 12:49:20 -0400 Subject: [PATCH 03/16] fixing custom segmentation notebook --- notebooks/Custom_Segmentation.ipynb | 174 +++++++--------------------- 1 file changed, 43 insertions(+), 131 deletions(-) diff --git a/notebooks/Custom_Segmentation.ipynb b/notebooks/Custom_Segmentation.ipynb index 0822baec..4161db3a 100644 --- a/notebooks/Custom_Segmentation.ipynb +++ b/notebooks/Custom_Segmentation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "9bd79809-7286-463c-a92a-e6315856d0a1", "metadata": {}, "outputs": [ @@ -10,8 +10,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n", "env: ANYWIDGET_HMR=1\n" ] } @@ -24,17 +22,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "236fbcbe-36de-483b-9afd-13a379a56899", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'0.4.0'" + "'0.9.0'" ] }, - "execution_count": 6, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -57,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "46377171-5592-418f-a6a1-0658a8494954", "metadata": {}, "outputs": [], @@ -76,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "cb0602d3-c572-417b-88e3-ba036098f050", "metadata": {}, "outputs": [ @@ -90,126 +88,48 @@ "Calculating mean expression\n", "Calculating variance\n", "All meta gene files are succesfully saved.\n", - "data/xenium_landscape_files/Xenium_Prime_Human_Skin_FFPE_outs/cbg_cellpose2\n", "\n", - "========Write gene-specific parquet files========\n", - "Processing gene 0: A2ML1\n", - "Processing gene 100: ADIPOR1\n", - "Processing gene 200: ANKRD40\n", - "Processing gene 300: ATF2\n", - "Processing gene 400: BDNF\n", - "Processing gene 500: CALCRL\n", - "Processing gene 600: CCL20\n", - "Processing gene 700: CD72\n", - "Processing gene 800: CFC1\n", - "Processing gene 900: CNKSR3\n", - "Processing gene 1000: CSNK1A1\n", - "Processing gene 1100: CYTH2\n", - "Processing gene 1200: DMKN\n", - "Processing gene 1300: DeprecatedCodeword_0994\n", - "Processing gene 1400: DeprecatedCodeword_15617\n", - "Processing gene 1500: DeprecatedCodeword_4666\n", - "Processing gene 1600: ECD\n", - "Processing gene 1700: EPCAM\n", - "Processing gene 1800: FANCC\n", - "Processing gene 1900: FMO3\n", - "Processing gene 2000: GALNS\n", - "Processing gene 2100: GPATCH11\n", - "Processing gene 2200: H3F3B\n", - "Processing gene 2300: HOXB9\n", - "Processing gene 2400: IFNW1\n", - "Processing gene 2500: INCA1\n", - "Processing gene 2600: JAM2\n", - "Processing gene 2700: KIR3DL1\n", - "Processing gene 2800: LILRA6\n", - "Processing gene 2900: MALL\n", - "Processing gene 3000: MEST\n", - "Processing gene 3100: MTCH2\n", - "Processing gene 3200: NCSTN\n", - "Processing gene 3300: NORAD\n", - "Processing gene 3400: NXPH2\n", - "Processing gene 3500: NegControlCodeword_18746\n", - "Processing gene 3600: NegControlCodeword_18846\n", - "Processing gene 3700: NegControlCodeword_18946\n", - "Processing gene 3800: NegControlCodeword_19046\n", - "Processing gene 3900: NegControlCodeword_19146\n", - "Processing gene 4000: NegControlCodeword_19246\n", - "Processing gene 4100: P2RX1\n", - "Processing gene 4200: PDE6H\n", - "Processing gene 4300: PKIA\n", - "Processing gene 4400: PPARD\n", - "Processing gene 4500: PRXL2A\n", - "Processing gene 4600: RABL2B\n", - "Processing gene 4700: RGN\n", - "Processing gene 4800: RUBCN\n", - "Processing gene 4900: SERPINA9\n", - "Processing gene 5000: SLC17A8\n", - "Processing gene 5100: SMC1A\n", - "Processing gene 5200: SPATS2L\n", - "Processing gene 5300: STX7\n", - "Processing gene 5400: TENT5B\n", - "Processing gene 5500: TMEM130\n", - "Processing gene 5600: TPX2\n", - "Processing gene 5700: TUFM\n", - "Processing gene 5800: UnassignedCodeword_0100\n", - "Processing gene 5900: UnassignedCodeword_0579\n", - "Processing gene 6000: UnassignedCodeword_0998\n", - "Processing gene 6100: UnassignedCodeword_10427\n", - "Processing gene 6200: UnassignedCodeword_10846\n", - "Processing gene 6300: UnassignedCodeword_11242\n", - "Processing gene 6400: UnassignedCodeword_11661\n", - "Processing gene 6500: UnassignedCodeword_12151\n", - "Processing gene 6600: UnassignedCodeword_12526\n", - "Processing gene 6700: UnassignedCodeword_12880\n", - "Processing gene 6800: UnassignedCodeword_13294\n", - "Processing gene 6900: UnassignedCodeword_13728\n", - "Processing gene 7000: UnassignedCodeword_14115\n", - "Processing gene 7100: UnassignedCodeword_14486\n", - "Processing gene 7200: UnassignedCodeword_1487\n", - "Processing gene 7300: UnassignedCodeword_15237\n", - "Processing gene 7400: UnassignedCodeword_15662\n", - "Processing gene 7500: UnassignedCodeword_16071\n", - "Processing gene 7600: UnassignedCodeword_16476\n", - "Processing gene 7700: UnassignedCodeword_16830\n", - "Processing gene 7800: UnassignedCodeword_17241\n", - "Processing gene 7900: UnassignedCodeword_17672\n", - "Processing gene 8000: UnassignedCodeword_18196\n", - "Processing gene 8100: UnassignedCodeword_1886\n", - "Processing gene 8200: UnassignedCodeword_2384\n", - "Processing gene 8300: UnassignedCodeword_2846\n", - "Processing gene 8400: UnassignedCodeword_3332\n", - "Processing gene 8500: UnassignedCodeword_3845\n", - "Processing gene 8600: UnassignedCodeword_4262\n", - "Processing gene 8700: UnassignedCodeword_4726\n", - "Processing gene 8800: UnassignedCodeword_5195\n", - "Processing gene 8900: UnassignedCodeword_5693\n", - "Processing gene 9000: UnassignedCodeword_6174\n", - "Processing gene 9100: UnassignedCodeword_6629\n", - "Processing gene 9200: UnassignedCodeword_7161\n", - "Processing gene 9300: UnassignedCodeword_7569\n", - "Processing gene 9400: UnassignedCodeword_8046\n", - "Processing gene 9500: UnassignedCodeword_8534\n", - "Processing gene 9600: UnassignedCodeword_9011\n", - "Processing gene 9700: UnassignedCodeword_9525\n", - "Processing gene 9800: UnassignedCodeword_9939\n", - "Processing gene 9900: XPO1\n", - "Processing gene 10000: ZNF687\n", - "All gene-specific parquet files are succesfully saved.\n", + "========Make meta cells in pixel space========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jishar/Documents/celldega/src/celldega/pre/__init__.py:656: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", "\n", - "========Make meta cells in pixel space========\n", + " meta_cell[\"center_x\"] = meta_cell.centroid.x\n", + "/Users/jishar/Documents/celldega/src/celldega/pre/__init__.py:657: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " meta_cell[\"center_y\"] = meta_cell.centroid.y\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Done.\n", + "data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test/cbg_proseg\n", + "Processing gene 0: AMY2A\n", + "Processing gene 100: PTGDS\n", + "Processing gene 200: EGFL7\n", + "Processing gene 300: ESR1\n", + "Processing gene 400: NegControlCodeword_0523\n", + "Processing gene 500: UnassignedCodeword_0459\n", + "All gene-specific parquet files are succesfully saved.\n", "\n", "========Create clusters and meta clusters files========\n", "Cell clusters and meta cluster files created successfully.\n", "\n", - "========Create cell boundary spatial tiles========\n" + "========Create cell boundary spatial tiles========\n", + "custom technology\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Processing coarse tiles: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:15<00:00, 1.77s/it]\n" + "Processing coarse tiles: 100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [00:24<00:00, 3.56s/it]\n" ] }, { @@ -227,8 +147,8 @@ } ], "source": [ - "path_landscape_files=\"data/xenium_landscape_files/Xenium_Prime_Human_Skin_FFPE_outs\"\n", - "path_segmentation_files=\"data/processed_data/xenium_skin/cellpose2/\"\n", + "path_landscape_files=\"data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test/\"\n", + "path_segmentation_files=\"data/processed_data/xenium_pancreas/proseg/\"\n", "\n", "dega.pre.add_custom_segmentation(path_landscape_files=path_landscape_files, \n", " path_segmentation_files=path_segmentation_files)" @@ -244,29 +164,22 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "7d440da2-1354-4653-ab09-bc24400f8833", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Server running on port 55358\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "767f649f38764b20815f31721dc9be66", + "model_id": "60e4ec5c3c0e402f86228e2b4a6cf8a9", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Landscape(base_url='http://localhost:55358/data/xenium_landscape_files/Xenium_Prime_Human_Skin_FFPE_outs', seg…" + "Landscape(base_url='http://localhost:52696/data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test/', cel…" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -275,9 +188,8 @@ "server_address = dega.viz.get_local_server()\n", "\n", "landscape_ist = dega.viz.Landscape(\n", - " technology='Xenium',\n", " base_url = f\"http://localhost:{server_address}/{path_landscape_files}\",\n", - " segmentation='cellpose2'\n", + " segmentation='proseg'\n", ")\n", "\n", "landscape_ist" @@ -308,7 +220,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.20" + "version": "3.11.5" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From 2d548f616767da5624b67fce501cf9798fa23c2c Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 13:54:34 -0400 Subject: [PATCH 04/16] fixed viz related pytests --- src/celldega/viz/widget.py | 5 +---- tests/unit/test_viz/test_landscape_colors.py | 9 +++++++- tests/unit/test_viz/test_widget.py | 23 ++++++++++++++------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index efd3f1aa..658121ae 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -7,7 +7,6 @@ from copy import deepcopy import json from pathlib import Path -import urllib.error import warnings import anywidget @@ -18,7 +17,7 @@ import scanpy as sc from shapely.affinity import affine_transform import traitlets - +import urllib.request _clustergram_registry = {} # maps names to widget instances _enrich_registry = {} # maps names to widget instances @@ -106,9 +105,7 @@ def __init__(self, **kwargs): if "technology" not in kwargs: path_parameters_json = base_path + "landscape_parameters.json" - try: - import urllib.request with urllib.request.urlopen(path_parameters_json) as f: landscape_parameters = json.load(f) diff --git a/tests/unit/test_viz/test_landscape_colors.py b/tests/unit/test_viz/test_landscape_colors.py index 27e053ca..0f0d8078 100644 --- a/tests/unit/test_viz/test_landscape_colors.py +++ b/tests/unit/test_viz/test_landscape_colors.py @@ -1,6 +1,9 @@ import numpy as np import pandas as pd import pytest +import io +import json +from unittest.mock import patch try: import anndata as ad @@ -8,8 +11,12 @@ except Exception as e: # pragma: no cover - if deps missing skip pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) +def mock_urlopen_success(*args, **kwargs): + fake_json = json.dumps({"technology": "sst"}).encode("utf-8") + return io.BytesIO(fake_json) -def test_leiden_colors_added_if_missing() -> None: +@patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_success) +def test_leiden_colors_added_if_missing(mock_urlopen) -> None: adata = ad.AnnData(np.zeros((3, 3))) adata.obs["leiden"] = pd.Categorical(["0", "1", "0"]) adata.uns.pop("leiden_colors", None) diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py index 5c68584e..38ab364c 100644 --- a/tests/unit/test_viz/test_widget.py +++ b/tests/unit/test_viz/test_widget.py @@ -1,11 +1,11 @@ -"""Tests for Clustergram widget with Parquet input.""" +"""Tests for Clustergram and Landscape widgets with Parquet input.""" +import io import json - import numpy as np import pandas as pd import pytest - +from unittest.mock import patch try: import geopandas as gpd @@ -40,8 +40,8 @@ def test_export_viz_parquet_returns_bytes() -> None: assert set(pq) == expected_keys for key in expected_keys - {"meta"}: - assert isinstance(pq[key], bytes | bytearray) - assert pq[key] # non-empty + assert isinstance(pq[key], (bytes, bytearray)) + assert pq[key] # ensure non-empty assert isinstance(pq["meta"], dict) @@ -79,7 +79,16 @@ def test_clustergram_selected_genes_trait() -> None: assert widget.selected_genes == ["A", "B"] -def test_landscape_nbhd_geojson_and_metadata() -> None: +# ---------- Landscape Patch and Test ---------- + +def mock_urlopen_success(*args, **kwargs): + """Mock function to simulate reading landscape_parameters.json.""" + fake_json = json.dumps({"technology": "sst"}).encode("utf-8") + return io.BytesIO(fake_json) + + +@patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_success) +def test_landscape_nbhd_geojson_and_metadata(mock_urlopen) -> None: gdf = gpd.GeoDataFrame( {"name": ["a"], "cat": ["x"]}, geometry=[Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])], @@ -88,7 +97,7 @@ def test_landscape_nbhd_geojson_and_metadata() -> None: widget = Landscape(nbhd=gdf, meta_nbhd=meta_nbhd) - # drop geometry_pixel column from gdf + # Drop geometry_pixel for comparison gdf = gdf.drop(columns=["geometry_pixel"], errors="ignore") assert widget.nbhd_geojson == json.loads(gdf.to_json()) From a2746d48878f61351e45cd56cb7ec200aaff128b Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 14:23:56 -0400 Subject: [PATCH 05/16] ruff + moving reading technology from back end to front end --- js/widget.js | 37 +++++++++++++++++++++++++- src/celldega/pre/__init__.py | 10 +++---- src/celldega/pre/run_pre_processing.py | 4 +-- src/celldega/viz/widget.py | 20 +++----------- 4 files changed, 46 insertions(+), 25 deletions(-) diff --git a/js/widget.js b/js/widget.js index f66b8d94..9c24184a 100644 --- a/js/widget.js +++ b/js/widget.js @@ -124,8 +124,41 @@ const render_landscape_h_e = async ({ model, el }) => { ); }; +const fetchTechnology = async (base_url) => { + const path_parameters_json = `${base_url || ''}/landscape_parameters.json`; + + console.warn( + "'technology' will be removed from the Python API in a future release. " + + "Please update your code to rely on landscape_parameters.json instead." + ); + + try { + const response = await fetch(path_parameters_json); + if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); + const landscape_parameters = await response.json(); + return landscape_parameters.technology || 'sst'; + } catch (e) { + console.warn( + `Could not read technology from ${path_parameters_json}. Using default 'sst'. Reason: ${e}` + ); + return 'sst'; + } +}; + const render_landscape = async ({ model, el }) => { - const technology = model.get('technology'); + let technology; + + try { + technology = model.get('technology'); + } catch (e) { + console.warn("No 'technology' key found on widget model. Will try to fetch it."); + } + + if (!technology) { + const base_url = model.get('base_url'); + technology = await fetchTechnology(base_url); + model.set('technology', technology); + } if (['MERSCOPE', 'Xenium'].includes(technology)) { return render_landscape_ist({ model, el }); @@ -133,6 +166,8 @@ const render_landscape = async ({ model, el }) => { return render_landscape_sst({ model, el }); } else if (['h&e'].includes(technology)) { return render_landscape_h_e({ model, el }); + } else { + console.warn(`Unknown technology "${technology}". Rendering skipped.`); } }; diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index 47206285..0c38bb84 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -689,13 +689,11 @@ def _align_and_deduplicate_genes(cbg_custom: pd.DataFrame, path_landscape_files: # Optional: check for duplicates in meta_gene index duplicated_genes = meta_gene.index[meta_gene.index.duplicated()].unique() - if not duplicated_genes.empty: + if len(duplicated_genes) > 0: print(f"Warning: Duplicate genes found in meta_gene index: {list(duplicated_genes)}") # Remove duplicate columns from cbg_custom - cbg_custom = cbg_custom.loc[:, ~cbg_custom.columns.duplicated()] - - return cbg_custom + return cbg_custom.loc[:, ~cbg_custom.columns.duplicated()] def make_meta_cell_image_coord( technology, @@ -880,7 +878,7 @@ def save_landscape_parameters( "tile_size": "N.A.", "image_info": image_info, "image_format": image_format, - "image_dimensions": {'width': image_width, 'height': image_height}, + "image_dimensions": {"width": image_width, "height": image_height}, "use_int_index": "N.A.", } elif technology != "custom": @@ -891,7 +889,7 @@ def save_landscape_parameters( "tile_size": tile_size, "image_info": image_info, "image_format": image_format, - "image_dimensions": {'width': image_width, 'height': image_height}, + "image_dimensions": {"width": image_width, "height": image_height}, "use_int_index": use_int_index, } else: diff --git a/src/celldega/pre/run_pre_processing.py b/src/celldega/pre/run_pre_processing.py index 37076f56..b0fbe5f5 100644 --- a/src/celldega/pre/run_pre_processing.py +++ b/src/celldega/pre/run_pre_processing.py @@ -251,8 +251,8 @@ def main( dega.pre.save_landscape_parameters( technology, path_landscape_files, - image_width=tile_bounds['x_max'], - image_height=tile_bounds['y_max'], + image_width=tile_bounds["x_max"], + image_height=tile_bounds["y_max"], image_name="dapi_files", tile_size=tile_size, image_info=dega.pre.get_image_info(technology, image_tile_layer), diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 658121ae..ce63d4f6 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -7,6 +7,8 @@ from copy import deepcopy import json from pathlib import Path +import urllib.request +import urllib.error import warnings import anywidget @@ -17,7 +19,7 @@ import scanpy as sc from shapely.affinity import affine_transform import traitlets -import urllib.request + _clustergram_registry = {} # maps names to widget instances _enrich_registry = {} # maps names to widget instances @@ -68,7 +70,7 @@ class Landscape(anywidget.AnyWidget): _css = Path(__file__).parent / "../static" / "widget.css" component = traitlets.Unicode("Landscape").tag(sync=True) - technology = traitlets.Unicode("sst").tag(sync=True) + technology = traitlets.Unicode(None, allow_none=True).tag(sync=True) base_url = traitlets.Unicode("").tag(sync=True) token = traitlets.Unicode("").tag(sync=True) creds = traitlets.Dict({}).tag(sync=True) @@ -103,20 +105,6 @@ def __init__(self, **kwargs): base_path = (kwargs.get("base_url") or "") + "/" - if "technology" not in kwargs: - path_parameters_json = base_path + "landscape_parameters.json" - try: - with urllib.request.urlopen(path_parameters_json) as f: - landscape_parameters = json.load(f) - - kwargs["technology"] = landscape_parameters.get("technology", "sst") - except (FileNotFoundError, urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, TypeError) as e: - kwargs["technology"] = "sst" - warnings.warn( - f"Could not read technology from {path_parameters_json}. Using default 'sst'. Reason: {e}", - stacklevel=2, - ) - adata = kwargs.pop("adata", None) or kwargs.pop("AnnData", None) pq_meta_cell = kwargs.pop("meta_cell_parquet", None) pq_meta_cluster = kwargs.pop("meta_cluster_parquet", None) From 5e1ef293f01764e8e4b8dcdf8b3809cb79a0d739 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 14:33:20 -0400 Subject: [PATCH 06/16] copilot suggested changes --- pyproject.toml | 1 + src/celldega/viz/widget.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index da8c71ff..890e97ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ build-backend = "hatchling.build" name = "celldega" version = "0.13.0a9" readme = "README.md" +requires-python = ">=3.8" dependencies = [ "anndata~=0.11.0", "anywidget~=0.9.18", diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index ce63d4f6..e8ae2371 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -7,8 +7,8 @@ from copy import deepcopy import json from pathlib import Path -import urllib.request import urllib.error +import urllib.request import warnings import anywidget From 955a8124cabf7ac9795fb5f3553696517f21da4d Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 14:38:02 -0400 Subject: [PATCH 07/16] ruff formatting --- src/celldega/pre/__init__.py | 6 +++++- src/celldega/viz/widget.py | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index 0c38bb84..daef681d 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -662,7 +662,10 @@ def _load_meta_cell_by_technology(technology, path_meta_cell_micron): return meta_cell -def _align_and_deduplicate_genes(cbg_custom: pd.DataFrame, path_landscape_files: str) -> pd.DataFrame: + +def _align_and_deduplicate_genes( + cbg_custom: pd.DataFrame, path_landscape_files: str +) -> pd.DataFrame: """ Ensures all genes from meta_gene.parquet are present in cbg_custom DataFrame. Adds missing genes with value 0 and removes duplicate columns (keeps the first occurrence). @@ -695,6 +698,7 @@ def _align_and_deduplicate_genes(cbg_custom: pd.DataFrame, path_landscape_files: # Remove duplicate columns from cbg_custom return cbg_custom.loc[:, ~cbg_custom.columns.duplicated()] + def make_meta_cell_image_coord( technology, path_transformation_matrix, diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index e8ae2371..a5cae154 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -102,7 +102,6 @@ class Landscape(anywidget.AnyWidget): height = traitlets.Int(800).tag(sync=True) def __init__(self, **kwargs): - base_path = (kwargs.get("base_url") or "") + "/" adata = kwargs.pop("adata", None) or kwargs.pop("AnnData", None) From 5b58891b8ecc4d971c5aff0ce40c1aa1fb42bca5 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 14:44:03 -0400 Subject: [PATCH 08/16] linting fix --- js/widget.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/js/widget.js b/js/widget.js index 9c24184a..99e77585 100644 --- a/js/widget.js +++ b/js/widget.js @@ -150,7 +150,8 @@ const render_landscape = async ({ model, el }) => { try { technology = model.get('technology'); - } catch (e) { + } catch { + // eslint-disable-next-line no-console console.warn("No 'technology' key found on widget model. Will try to fetch it."); } @@ -167,6 +168,7 @@ const render_landscape = async ({ model, el }) => { } else if (['h&e'].includes(technology)) { return render_landscape_h_e({ model, el }); } else { + // eslint-disable-next-line no-console console.warn(`Unknown technology "${technology}". Rendering skipped.`); } }; From 7f09c232761d636c40cd03f7fb67b9a97b37218e Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Fri, 25 Jul 2025 14:49:10 -0400 Subject: [PATCH 09/16] prettier formatting --- js/widget.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/js/widget.js b/js/widget.js index 99e77585..b514d2f6 100644 --- a/js/widget.js +++ b/js/widget.js @@ -129,7 +129,7 @@ const fetchTechnology = async (base_url) => { console.warn( "'technology' will be removed from the Python API in a future release. " + - "Please update your code to rely on landscape_parameters.json instead." + 'Please update your code to rely on landscape_parameters.json instead.' ); try { @@ -152,7 +152,9 @@ const render_landscape = async ({ model, el }) => { technology = model.get('technology'); } catch { // eslint-disable-next-line no-console - console.warn("No 'technology' key found on widget model. Will try to fetch it."); + console.warn( + "No 'technology' key found on widget model. Will try to fetch it." + ); } if (!technology) { From 514e5a9a01c94e70113d9b22daec4b8c7f6cbdec Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Fri, 25 Jul 2025 16:35:17 -0400 Subject: [PATCH 10/16] adjusted defaults for images --- notebooks/Pre-processor_xenium_merscope.ipynb | 92 +++++++++++++++---- src/celldega/pre/run_pre_processing.py | 2 +- 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/notebooks/Pre-processor_xenium_merscope.ipynb b/notebooks/Pre-processor_xenium_merscope.ipynb index f0ad4ae9..b92ff8d6 100644 --- a/notebooks/Pre-processor_xenium_merscope.ipynb +++ b/notebooks/Pre-processor_xenium_merscope.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "b81ab32e", "metadata": {}, "outputs": [ @@ -18,10 +18,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n", "env: ANYWIDGET_HMR=1\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/feni/Documents/celldega/dega/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.5 when it was built against 1.14.6, this may cause problems\n", + " _warn((\"h5py is running against HDF5 {0} when it was built against {1}, \"\n" + ] } ], "source": [ @@ -42,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "13350680", "metadata": {}, "outputs": [ @@ -51,13 +57,12 @@ "output_type": "stream", "text": [ "Starting preprocessing for sample: Xenium_V1_human_Pancreas_FFPE_outs\n", - "Created directory: data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test\n", "\n", "========Unzip and extract Xenium-related files========\n", "All files have been successfully extracted or skipped.\n", "\n", "========Write xenium transform file from the Zarr folder========\n", - "Transformation matrix saved to 'data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test/micron_to_image_transform.csv'.\n", + "Transformation matrix saved to 'data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_2025-07-25/micron_to_image_transform.csv'.\n", "\n", "========Check if all required files or directories exist========\n", "All required files or directories for technology 'Xenium' are present in 'data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs'.\n", @@ -87,7 +92,7 @@ "Calculating mean expression\n", "Calculating variance\n", "All meta gene files are succesfully saved.\n", - "data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test/cbg\n", + "data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_2025-07-25/cbg\n", "Processing gene 0: ABCC11\n", "Processing gene 100: CLECL1\n", "Processing gene 200: IL1RL1\n", @@ -113,27 +118,76 @@ "Cell clusters and meta cluster files created successfully.\n", "\n", "========Generating image tiles========\n", - "------ xenium\n" + "------ xenium\n", + "generating dapi image tiles ...\n", + "generating bound image tiles ...\n", + "generating rna image tiles ...\n", + "generating prot image tiles ...\n", + "Image tiles created successfully.\n", + "\n", + "========Generating transcript tiles========\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing chunks: 100%|███████████████████████████████████████| 81/81 [00:00<00:00, 957.99it/s]\n", + "Processing coarse tiles: 84tile [00:17, 4.86tile/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tile bounds: {'x_min': 0, 'x_max': 34126.65, 'y_min': 0, 'y_max': 13744.4}\n", + "\n", + "========Generating boundary tiles========\n", + "\n", + "========Create cell boundary spatial tiles========\n", + "technology Xenium\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing coarse tiles: 100%|██████████████████████████████████| 14/14 [00:15<00:00, 1.10s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done.\n", + "\n", + "========Save landscape parameters========\n", + "Done.\n", + "Preprocessing completed successfully.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] } ], "source": [ "sample = 'Xenium_V1_human_Pancreas_FFPE_outs'\n", "data_dir = f'data/xenium_data/'\n", - "path_landscape_files=f'data/landscape_files/{sample}_test'\n", + "path_landscape_files=f'data/landscape_files/{sample}_2025-07-25'\n", "\n", "tile_size=250\n", - "image_tile_layer='dapi'\n", - "\n", "\n", "dega.pre.main(\n", " sample=sample,\n", " data_root_dir=data_dir,\n", " tile_size=tile_size,\n", - " image_tile_layer=image_tile_layer,\n", " path_landscape_files=path_landscape_files,\n", " use_int_index=True,\n", - " )" + ")" ] }, { @@ -146,22 +200,22 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "4a1320eb", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2094d99ad409483cb5f74bff528fc511", + "model_id": "028f5dba914140068c6c1cce5bbcfc64", "version_major": 2, - "version_minor": 0 + "version_minor": 1 }, "text/plain": [ - "Landscape(base_url='http://localhost:50952/data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_test', cell…" + "Landscape(base_url='http://localhost:58093/data/landscape_files/Xenium_V1_human_Pancreas_FFPE_outs_2025-07-25'…" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -354,7 +408,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.7" }, "toc": { "base_numbering": 1, diff --git a/src/celldega/pre/run_pre_processing.py b/src/celldega/pre/run_pre_processing.py index b0fbe5f5..c3ce5d65 100644 --- a/src/celldega/pre/run_pre_processing.py +++ b/src/celldega/pre/run_pre_processing.py @@ -112,8 +112,8 @@ def main( sample, data_root_dir, tile_size, - image_tile_layer, path_landscape_files, + image_tile_layer='all', use_int_index=True, max_workers=1, ): From 5cbecd89d2530cfea748ff7cc741046405af639b Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Mon, 28 Jul 2025 11:30:58 -0400 Subject: [PATCH 11/16] review changes - deduplication of genes, console log switch to errors --- js/widget.js | 15 +++++------ src/celldega/pre/__init__.py | 49 ++++++++++++++++++++++++++---------- src/celldega/viz/widget.py | 1 - 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/js/widget.js b/js/widget.js index b514d2f6..47dfe4cd 100644 --- a/js/widget.js +++ b/js/widget.js @@ -136,12 +136,12 @@ const fetchTechnology = async (base_url) => { const response = await fetch(path_parameters_json); if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); const landscape_parameters = await response.json(); - return landscape_parameters.technology || 'sst'; - } catch (e) { - console.warn( - `Could not read technology from ${path_parameters_json}. Using default 'sst'. Reason: ${e}` + return landscape_parameters.technology || 'Xenium'; + } catch (err) { + console.error(err); + throw new Error( + `Could not read technology from ${path_parameters_json}. Using default 'Xenium'` ); - return 'sst'; } }; @@ -151,10 +151,7 @@ const render_landscape = async ({ model, el }) => { try { technology = model.get('technology'); } catch { - // eslint-disable-next-line no-console - console.warn( - "No 'technology' key found on widget model. Will try to fetch it." - ); + technology = null; // fallback to fetching from JSON } if (!technology) { diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index daef681d..2e9f5d78 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -662,13 +662,29 @@ def _load_meta_cell_by_technology(technology, path_meta_cell_micron): return meta_cell +def _make_names_unique(index: pd.Index) -> pd.Index: + """ + Mimics AnnData.var_names_make_unique() behavior: + Appends '-1', '-2', etc., to duplicate entries. + """ + seen = {} + unique_names = [] + for name in index: + if name not in seen: + seen[name] = 0 + unique_names.append(name) + else: + seen[name] += 1 + unique_names.append(f"{name}-{seen[name]}") + return pd.Index(unique_names) def _align_and_deduplicate_genes( cbg_custom: pd.DataFrame, path_landscape_files: str ) -> pd.DataFrame: """ - Ensures all genes from meta_gene.parquet are present in cbg_custom DataFrame. - Adds missing genes with value 0 and removes duplicate columns (keeps the first occurrence). + Ensures genes in cbg_custom and meta_gene are identical, + and makes duplicate gene names unique by suffixing. + Raises an error if there is a mismatch in gene sets. Parameters: ----------- @@ -680,23 +696,30 @@ def _align_and_deduplicate_genes( Returns: -------- pd.DataFrame - Cleaned cbg_custom DataFrame with all expected genes and no duplicate columns. + cbg_custom with deduplicated gene names (columns). """ meta_gene_path = Path(path_landscape_files) / "meta_gene.parquet" meta_gene = pd.read_parquet(meta_gene_path) - # Add missing gene columns with default value 0 - missing_cols = meta_gene.index.difference(cbg_custom.columns) - for col in missing_cols: - cbg_custom[col] = 0 + # Compare unordered gene sets before deduplication + genes_meta = set(meta_gene.index) + genes_cbg = set(cbg_custom.columns) + + if genes_meta != genes_cbg: + missing_in_cbg = genes_meta - genes_cbg + missing_in_meta = genes_cbg - genes_meta + raise ValueError( + f"Mismatch between cbg_custom and meta_gene genes.\n" + f"Missing in cbg_custom: {missing_in_cbg}\n" + f"Missing in meta_gene: {missing_in_meta}" + ) - # Optional: check for duplicates in meta_gene index - duplicated_genes = meta_gene.index[meta_gene.index.duplicated()].unique() - if len(duplicated_genes) > 0: - print(f"Warning: Duplicate genes found in meta_gene index: {list(duplicated_genes)}") + # Make gene names unique consistently across both + cbg_custom.columns = _make_names_unique(pd.Index(cbg_custom.columns)) + meta_gene.index = _make_names_unique(pd.Index(meta_gene.index)) - # Remove duplicate columns from cbg_custom - return cbg_custom.loc[:, ~cbg_custom.columns.duplicated()] + # Align column order to meta_gene index order + return cbg_custom.loc[:, meta_gene.index] def make_meta_cell_image_coord( diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index a5cae154..87fcac25 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -8,7 +8,6 @@ import json from pathlib import Path import urllib.error -import urllib.request import warnings import anywidget From 991b8c6a4e17a1590f12e421bc9d451c55a8fba7 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Mon, 28 Jul 2025 11:42:07 -0400 Subject: [PATCH 12/16] adding error messages in python --- js/widget.js | 25 ++++++++++++++----------- src/celldega/pre/run_pre_processing.py | 2 +- src/celldega/viz/widget.py | 8 ++++++++ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/js/widget.js b/js/widget.js index 47dfe4cd..ed4cd2f6 100644 --- a/js/widget.js +++ b/js/widget.js @@ -124,24 +124,27 @@ const render_landscape_h_e = async ({ model, el }) => { ); }; -const fetchTechnology = async (base_url) => { +const fetchTechnology = async (base_url, model) => { const path_parameters_json = `${base_url || ''}/landscape_parameters.json`; - console.warn( - "'technology' will be removed from the Python API in a future release. " + - 'Please update your code to rely on landscape_parameters.json instead.' - ); - try { const response = await fetch(path_parameters_json); if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); const landscape_parameters = await response.json(); return landscape_parameters.technology || 'Xenium'; } catch (err) { - console.error(err); - throw new Error( - `Could not read technology from ${path_parameters_json}. Using default 'Xenium'` - ); + const msg = `Could not read technology from ${path_parameters_json}. Using default 'Xenium'`; + + console.warn(msg, err); + + // Send warning to backend + model.send({ + event: 'technology_fetch_warning', + message: msg, + error: err.message, + }); + + return 'Xenium'; } }; @@ -156,7 +159,7 @@ const render_landscape = async ({ model, el }) => { if (!technology) { const base_url = model.get('base_url'); - technology = await fetchTechnology(base_url); + technology = await fetchTechnology(base_url, model); model.set('technology', technology); } diff --git a/src/celldega/pre/run_pre_processing.py b/src/celldega/pre/run_pre_processing.py index c3ce5d65..e23ebfb0 100644 --- a/src/celldega/pre/run_pre_processing.py +++ b/src/celldega/pre/run_pre_processing.py @@ -113,7 +113,7 @@ def main( data_root_dir, tile_size, path_landscape_files, - image_tile_layer='all', + image_tile_layer="all", use_int_index=True, max_workers=1, ): diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 87fcac25..f427fd4c 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -207,6 +207,14 @@ def _df_to_bytes(df): super().__init__(**kwargs) + def _handle_frontend_msg(_, content, buffers): + if content.get("event") == "technology_fetch_warning": + msg = content.get("message", "Technology fetch warning from frontend.") + err = content.get("error", "") + warnings.warn(f"{msg} Frontend error: {err}", stacklevel=2) + + self.on_msg(_handle_frontend_msg) + # store DataFrames locally without syncing to the frontend self.meta_cell = meta_cell_df self.meta_nbhd = meta_nbhd_df From 88093754ea79e5246fa412ee87e8dd4db00b6ebe Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Mon, 28 Jul 2025 11:59:37 -0400 Subject: [PATCH 13/16] more fixes --- js/widget.js | 14 +++++++------- src/celldega/pre/__init__.py | 2 ++ src/celldega/viz/widget.py | 7 +++++-- tests/unit/test_viz/test_landscape_colors.py | 2 +- tests/unit/test_viz/test_widget.py | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/js/widget.js b/js/widget.js index ed4cd2f6..f92108c9 100644 --- a/js/widget.js +++ b/js/widget.js @@ -134,15 +134,15 @@ const fetchTechnology = async (base_url, model) => { return landscape_parameters.technology || 'Xenium'; } catch (err) { const msg = `Could not read technology from ${path_parameters_json}. Using default 'Xenium'`; - console.warn(msg, err); - // Send warning to backend - model.send({ - event: 'technology_fetch_warning', - message: msg, - error: err.message, - }); + if (model?.send) { + model.send({ + event: 'technology_fetch_warning', + message: msg, + error: err.message || '', + }); + } return 'Xenium'; } diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index 2e9f5d78..570db031 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -662,6 +662,7 @@ def _load_meta_cell_by_technology(technology, path_meta_cell_micron): return meta_cell + def _make_names_unique(index: pd.Index) -> pd.Index: """ Mimics AnnData.var_names_make_unique() behavior: @@ -678,6 +679,7 @@ def _make_names_unique(index: pd.Index) -> pd.Index: unique_names.append(f"{name}-{seen[name]}") return pd.Index(unique_names) + def _align_and_deduplicate_genes( cbg_custom: pd.DataFrame, path_landscape_files: str ) -> pd.DataFrame: diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index f427fd4c..c8bb7284 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -20,6 +20,9 @@ import traitlets +warnings.simplefilter("always") + + _clustergram_registry = {} # maps names to widget instances _enrich_registry = {} # maps names to widget instances @@ -207,11 +210,11 @@ def _df_to_bytes(df): super().__init__(**kwargs) - def _handle_frontend_msg(_, content, buffers): + def _handle_frontend_msg(widget, content, buffers): if content.get("event") == "technology_fetch_warning": msg = content.get("message", "Technology fetch warning from frontend.") err = content.get("error", "") - warnings.warn(f"{msg} Frontend error: {err}", stacklevel=2) + warnings.warn(f"{msg}\nFrontend error: {err}", stacklevel=2) self.on_msg(_handle_frontend_msg) diff --git a/tests/unit/test_viz/test_landscape_colors.py b/tests/unit/test_viz/test_landscape_colors.py index 0f0d8078..5dc3ff96 100644 --- a/tests/unit/test_viz/test_landscape_colors.py +++ b/tests/unit/test_viz/test_landscape_colors.py @@ -12,7 +12,7 @@ pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) def mock_urlopen_success(*args, **kwargs): - fake_json = json.dumps({"technology": "sst"}).encode("utf-8") + fake_json = json.dumps({"technology": "Xenium"}).encode("utf-8") return io.BytesIO(fake_json) @patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_success) diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py index 38ab364c..cc8bd31d 100644 --- a/tests/unit/test_viz/test_widget.py +++ b/tests/unit/test_viz/test_widget.py @@ -83,7 +83,7 @@ def test_clustergram_selected_genes_trait() -> None: def mock_urlopen_success(*args, **kwargs): """Mock function to simulate reading landscape_parameters.json.""" - fake_json = json.dumps({"technology": "sst"}).encode("utf-8") + fake_json = json.dumps({"technology": "Xenium"}).encode("utf-8") return io.BytesIO(fake_json) From 57fa8ef57a40d0eb1dfa3f557e4341292c955054 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Mon, 28 Jul 2025 16:07:24 -0400 Subject: [PATCH 14/16] copilot comments --- js/widget.js | 8 +++++--- src/celldega/pre/__init__.py | 16 ++++++++++++---- src/celldega/viz/widget.py | 1 - 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/js/widget.js b/js/widget.js index f92108c9..d137d93a 100644 --- a/js/widget.js +++ b/js/widget.js @@ -12,6 +12,8 @@ import { landscape_sst } from './viz/landscape_sst'; import { matrix_viz } from './viz/matrix_viz'; import { render_enrich } from './widgets/enrich_widget'; +const DEFAULT_TECHNOLOGY = 'Xenium'; + // Remove export keywords from render functions const render_landscape_ist = async ({ model, el }) => { const token = model.get('token'); @@ -131,9 +133,9 @@ const fetchTechnology = async (base_url, model) => { const response = await fetch(path_parameters_json); if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); const landscape_parameters = await response.json(); - return landscape_parameters.technology || 'Xenium'; + return landscape_parameters.technology || DEFAULT_TECHNOLOGY; } catch (err) { - const msg = `Could not read technology from ${path_parameters_json}. Using default 'Xenium'`; + const msg = `Could not read technology from ${path_parameters_json}. Using default ${DEFAULT_TECHNOLOGY}`; console.warn(msg, err); if (model?.send) { @@ -144,7 +146,7 @@ const fetchTechnology = async (base_url, model) => { }); } - return 'Xenium'; + return DEFAULT_TECHNOLOGY; } }; diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index 570db031..4fbdbe03 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -665,8 +665,16 @@ def _load_meta_cell_by_technology(technology, path_meta_cell_micron): def _make_names_unique(index: pd.Index) -> pd.Index: """ - Mimics AnnData.var_names_make_unique() behavior: - Appends '-1', '-2', etc., to duplicate entries. + Ensure uniqueness of values in a pandas Index by appending suffixes to duplicates. + + For each duplicated entry, appends a hyphen and a count (e.g., 'name', 'name-1', 'name-2', ...). + The first occurrence of each name is left unchanged. + + Parameters: + index (pd.Index): A pandas Index potentially containing duplicate values. + + Returns: + pd.Index: A new Index with all values made unique. """ seen = {} unique_names = [] @@ -712,8 +720,8 @@ def _align_and_deduplicate_genes( missing_in_meta = genes_cbg - genes_meta raise ValueError( f"Mismatch between cbg_custom and meta_gene genes.\n" - f"Missing in cbg_custom: {missing_in_cbg}\n" - f"Missing in meta_gene: {missing_in_meta}" + f"Missing in cbg_custom (up to 20): {list(missing_in_cbg)[:20]}\n" + f"Missing in meta_gene (up to 20): {list(missing_in_meta)[:20]}" ) # Make gene names unique consistently across both diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index c8bb7284..89c6a4b3 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -22,7 +22,6 @@ warnings.simplefilter("always") - _clustergram_registry = {} # maps names to widget instances _enrich_registry = {} # maps names to widget instances From 6b05d3cc7489711dd34e6173bbc0e32fb024c700 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Wed, 30 Jul 2025 07:11:45 -0400 Subject: [PATCH 15/16] fix issues related to celldega version + ruff + linting fixes --- js/widget.js | 157 ++++++++++++++----- src/celldega/viz/widget.py | 36 +++-- tests/unit/test_enrich/test_enrich_widget.py | 1 - tests/unit/test_pre/test_pre_tiles.py | 47 +++--- tests/unit/test_viz/test_landscape_colors.py | 17 +- tests/unit/test_viz/test_widget.py | 52 +++--- 6 files changed, 205 insertions(+), 105 deletions(-) diff --git a/js/widget.js b/js/widget.js index d137d93a..f95bede1 100644 --- a/js/widget.js +++ b/js/widget.js @@ -1,5 +1,6 @@ import './widget.css'; +import { options, set_options } from './global_variables/fetch_options'; import { networkFromParquet } from './read_parquet/network_from_parquet'; import { objects_from_parquet } from './read_parquet/objects_from_parquet'; import { @@ -12,7 +13,68 @@ import { landscape_sst } from './viz/landscape_sst'; import { matrix_viz } from './viz/matrix_viz'; import { render_enrich } from './widgets/enrich_widget'; -const DEFAULT_TECHNOLOGY = 'Xenium'; +function issueCrossPlatformWarning(message, model, el, showInNotebook = true) { + /* eslint-disable-next-line no-console */ + console.warn(`⚠️ ${message}`); + + if (showInNotebook) { + const warnDiv = document.createElement('div'); + warnDiv.style.color = 'orange'; + warnDiv.style.padding = '6px'; + warnDiv.style.fontSize = '0.9em'; + warnDiv.style.fontWeight = 'bold'; + warnDiv.textContent = `⚠️ ${message}`; + el.appendChild(warnDiv); + } + + if (model?.send) { + model.send({ event: 'js_warning', message }); + } +} + +const fetchLandscapeTechnology = async (model, _el) => { + const base_url = model.get('base_url'); + const token = model.get('token'); + + try { + set_options(token); + const url = `${base_url}/landscape_parameters.json`; + const response = await fetch(url, options.fetch); + + if (!response.ok) { + const error = new Error( + `Failed to fetch landscape_parameters.json: ${response.statusText}` + ); + error.status = response.status; + throw error; + } + + const json = await response.json(); + + if (!json.technology) { + const message = + 'The landscape_parameters.json file appears to be missing the `technology` field. Please verify its contents.'; + + /* eslint-disable-next-line no-console */ + console.warn(`⚠️ ${message}`); + model.send({ event: 'js_error', message }); + throw new Error(message); + } + + return json.technology; + } catch (error) { + const errorResult = handleAsyncError(error, { + context: 'fetchLandscapeTechnology', + messages: { + notFound: 'landscape_parameters.json not found', + unexpected: 'Error fetching landscape_parameters.json', + }, + }); + + model.send({ event: 'js_error', message: errorResult.message }); + return null; + } +}; // Remove export keywords from render functions const render_landscape_ist = async ({ model, el }) => { @@ -126,54 +188,61 @@ const render_landscape_h_e = async ({ model, el }) => { ); }; -const fetchTechnology = async (base_url, model) => { - const path_parameters_json = `${base_url || ''}/landscape_parameters.json`; +const DEFAULT_TECHNOLOGY = 'Xenium'; - try { - const response = await fetch(path_parameters_json); - if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`); - const landscape_parameters = await response.json(); - return landscape_parameters.technology || DEFAULT_TECHNOLOGY; - } catch (err) { - const msg = `Could not read technology from ${path_parameters_json}. Using default ${DEFAULT_TECHNOLOGY}`; - console.warn(msg, err); - - if (model?.send) { - model.send({ - event: 'technology_fetch_warning', - message: msg, - error: err.message || '', - }); - } +const render_landscape = async ({ model, el }) => { + let technology = model.get('technology'); + const userPassedTechnology = + Object.prototype.hasOwnProperty.call(model, 'attributes') && + Object.prototype.hasOwnProperty.call(model.attributes, 'technology'); - return DEFAULT_TECHNOLOGY; - } -}; + if (!technology) { + issueCrossPlatformWarning( + 'Technology was not passed in the function – attempting to fetch this from landscape_parameters.json.', + model, + el, + false + ); -const render_landscape = async ({ model, el }) => { - let technology; + const fetchedTech = await fetchLandscapeTechnology(model, el); - try { - technology = model.get('technology'); - } catch { - technology = null; // fallback to fetching from JSON - } + if (!fetchedTech) { + // Fallback to DEFAULT_TECHNOLOGY with a strong warning + const fallbackMsg = + `Neither technology was explicitly passed nor found in landscape_parameters.json. ` + + `Falling back to default: ${DEFAULT_TECHNOLOGY}`; + issueCrossPlatformWarning(fallbackMsg, model, el); + + technology = DEFAULT_TECHNOLOGY; + } else { + technology = fetchedTech; + } - if (!technology) { - const base_url = model.get('base_url'); - technology = await fetchTechnology(base_url, model); model.set('technology', technology); + model.save_changes(); + } else if (userPassedTechnology) { + issueCrossPlatformWarning( + 'Setting `technology` manually is deprecated and will be removed in a future release. Please rely on automatic detection via landscape_parameters.json.', + model, + el + ); + } + + if ( + !['MERSCOPE', DEFAULT_TECHNOLOGY, 'Visium-HD', 'h&e'].includes(technology) + ) { + const msg = `Unsupported technology: ${technology}`; + handleValidationWarning(msg); + model.send({ event: 'js_warning', message: msg }); + return; } - if (['MERSCOPE', 'Xenium'].includes(technology)) { + if (['MERSCOPE', DEFAULT_TECHNOLOGY].includes(technology)) { return render_landscape_ist({ model, el }); - } else if (['Visium-HD'].includes(technology)) { + } else if (technology === 'Visium-HD') { return render_landscape_sst({ model, el }); - } else if (['h&e'].includes(technology)) { + } else if (technology === 'h&e') { return render_landscape_h_e({ model, el }); - } else { - // eslint-disable-next-line no-console - console.warn(`Unknown technology "${technology}". Rendering skipped.`); } }; @@ -201,6 +270,18 @@ const render_matrix_new = async ({ model, el }) => { // Main render function - no export keyword async function render({ model, el }) { let cleanup = null; + model.on('msg:custom', (msg) => { + if (msg.event === 'py_warning') { + /* eslint-disable-next-line no-console */ + console.warn('[PYTHON WARNING]', msg.message); + el.innerHTML += `
⚠️ ${msg.message}
`; + } else if (msg.event === 'py_error') { + /* eslint-disable-next-line no-console */ + console.error('[PYTHON ERROR]', msg.message); + el.innerHTML += `
❌ ${msg.message}
`; + } + }); + try { const componentType = model.get('component'); diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 89c6a4b3..ad3e90a9 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -20,8 +20,6 @@ import traitlets -warnings.simplefilter("always") - _clustergram_registry = {} # maps names to widget instances _enrich_registry = {} # maps names to widget instances @@ -71,7 +69,7 @@ class Landscape(anywidget.AnyWidget): _css = Path(__file__).parent / "../static" / "widget.css" component = traitlets.Unicode("Landscape").tag(sync=True) - technology = traitlets.Unicode(None, allow_none=True).tag(sync=True) + technology = traitlets.Unicode("").tag(sync=True) base_url = traitlets.Unicode("").tag(sync=True) token = traitlets.Unicode("").tag(sync=True) creds = traitlets.Dict({}).tag(sync=True) @@ -103,7 +101,13 @@ class Landscape(anywidget.AnyWidget): height = traitlets.Int(800).tag(sync=True) def __init__(self, **kwargs): - base_path = (kwargs.get("base_url") or "") + "/" + technology_value = kwargs.get("technology", "") + if technology_value: + warnings.warn( + "[DEPRECATION WARNING] Passing `technology` manually to the Landscape widget is deprecated and will be removed in a future release. " + "Please rely on automatic detection via `landscape_parameters.json`.", + stacklevel=2, + ) adata = kwargs.pop("adata", None) or kwargs.pop("AnnData", None) pq_meta_cell = kwargs.pop("meta_cell_parquet", None) @@ -119,6 +123,8 @@ def __init__(self, **kwargs): meta_cluster_df = None cell_attr = kwargs.pop("cell_attr", ["leiden"]) + base_path = (kwargs.get("base_url") or "") + "/" + path_transformation_matrix = base_path + "micron_to_image_transform.csv" try: @@ -209,13 +215,8 @@ def _df_to_bytes(df): super().__init__(**kwargs) - def _handle_frontend_msg(widget, content, buffers): - if content.get("event") == "technology_fetch_warning": - msg = content.get("message", "Technology fetch warning from frontend.") - err = content.get("error", "") - warnings.warn(f"{msg}\nFrontend error: {err}", stacklevel=2) - - self.on_msg(_handle_frontend_msg) + # handle messages from the frontend for warnings/errors + self.on_msg(self._handle_frontend_message) # store DataFrames locally without syncing to the frontend self.meta_cell = meta_cell_df @@ -244,6 +245,19 @@ def _handle_frontend_msg(widget, content, buffers): self.nbhd_geojson = json.loads(gdf_viz.to_json()) + def _handle_frontend_message(self, _, content, buffers=None): + event = content.get("event") + message = content.get("message", "") + + if event == "js_warning": + print(f"JavaScript warning: {message}") + warnings.warn(message, stacklevel=2) + elif event == "js_error": + print(f"JavaScript error: {message}") + warnings.warn(f"JavaScript error: {message}", stacklevel=2) + else: + print(f"Unhandled frontend event: {event}") + # @traitlets.observe("nbhd") # def _on_nbhd_change(self, change): # new = change["new"] diff --git a/tests/unit/test_enrich/test_enrich_widget.py b/tests/unit/test_enrich/test_enrich_widget.py index 2e74a113..36ff331f 100644 --- a/tests/unit/test_enrich/test_enrich_widget.py +++ b/tests/unit/test_enrich/test_enrich_widget.py @@ -34,4 +34,3 @@ def test_enrich_traitlets_update() -> None: assert w.top_n_genes == 20 w.background_list = ["X", "Y"] assert w.background_list == ["X", "Y"] - diff --git a/tests/unit/test_pre/test_pre_tiles.py b/tests/unit/test_pre/test_pre_tiles.py index 36af7bb6..7778fa92 100644 --- a/tests/unit/test_pre/test_pre_tiles.py +++ b/tests/unit/test_pre/test_pre_tiles.py @@ -1,13 +1,14 @@ import importlib.util import math +from pathlib import Path import sys import types -from pathlib import Path import numpy as np import pandas as pd import pytest + try: import geopandas as gpd import polars as pl @@ -33,9 +34,7 @@ sys.modules["celldega.pre.boundary_tile"] = boundary_tile spec_b.loader.exec_module(boundary_tile) -spec_t = importlib.util.spec_from_file_location( - "celldega.pre.trx_tile", PRE_ROOT / "trx_tile.py" -) +spec_t = importlib.util.spec_from_file_location("celldega.pre.trx_tile", PRE_ROOT / "trx_tile.py") trx_tile = importlib.util.module_from_spec(spec_t) trx_tile.__package__ = "celldega.pre" sys.modules["celldega.pre.trx_tile"] = trx_tile @@ -50,6 +49,7 @@ TILE_SIZE = 250 BBOX = (0, 500, 0, 500) + def create_cell_polygon(df: pd.DataFrame) -> Polygon: """ Constructs a Shapely Polygon from a DataFrame containing 'vertex_x' and 'vertex_y' columns. @@ -76,16 +76,18 @@ def create_cell_polygon(df: pd.DataFrame) -> Polygon: if len(df) < 3: raise ValueError("At least three vertices are required to construct a polygon.") - return Polygon(zip(df["vertex_x"], df["vertex_y"])) + return Polygon(zip(df["vertex_x"], df["vertex_y"], strict=False)) + @pytest.fixture def make_synthetic_data(tmp_path): def _make(technology): return _generate_synthetic_data(tmp_path, technology) + return _make -def _generate_synthetic_data(tmp_path: Path, technology: str) -> dict[str, Path]: +def _generate_synthetic_data(tmp_path: Path, technology: str) -> dict[str, Path]: """ Generate synthetic spatial transcriptomics data for testing purposes. @@ -182,18 +184,15 @@ def _generate_synthetic_data(tmp_path: Path, technology: str) -> dict[str, Path] df_meta_cell = pd.DataFrame({"name": [f"cell_{i}" for i in range(N_CELLS)]}) df_meta_cell.to_parquet(tmp_path / "cell_metadata.parquet", index=False) - points = [ - (rng.uniform(BBOX[0], BBOX[1]), rng.uniform(BBOX[2], BBOX[3])) - for _ in range(N_TRX) - ] - genes = [f"G{i%3}" for i in range(N_TRX)] + points = [(rng.uniform(BBOX[0], BBOX[1]), rng.uniform(BBOX[2], BBOX[3])) for _ in range(N_TRX)] + genes = [f"G{i % 3}" for i in range(N_TRX)] if technology == "MERSCOPE": df_trx = pl.DataFrame( { "gene": genes, "global_x": [p[0] for p in points], "global_y": [p[1] for p in points], - "cell_id": [f"cell_{i%N_CELLS}" for i in range(N_TRX)], + "cell_id": [f"cell_{i % N_CELLS}" for i in range(N_TRX)], "transcript_id": list(range(N_TRX)), } ) @@ -203,7 +202,7 @@ def _generate_synthetic_data(tmp_path: Path, technology: str) -> dict[str, Path] "feature_name": genes, "x_location": [p[0] for p in points], "y_location": [p[1] for p in points], - "cell_id": [f"cell_{i%N_CELLS}" for i in range(N_TRX)], + "cell_id": [f"cell_{i % N_CELLS}" for i in range(N_TRX)], "transcript_id": list(range(N_TRX)), } ) @@ -290,9 +289,7 @@ def test_tiles(make_synthetic_data, technology) -> None: assert total_trx == N_TRX # Step 3: Ensure that every transcript maps to one of the generated transcript tile coordinates - produced_trx_tiles = { - tuple(map(int, p.stem.split("_")[-2:])) for p in trx_tile_files - } + produced_trx_tiles = {tuple(map(int, p.stem.split("_")[-2:])) for p in trx_tile_files} if technology == "MERSCOPE": df_trx = pl.read_parquet(paths["trx_path"]).to_pandas() @@ -303,7 +300,7 @@ def test_tiles(make_synthetic_data, technology) -> None: else: raise ValueError(f"Unsupported technology: {technology}") - for x, y in zip(df_trx[xcol], df_trx[ycol]): + for x, y in zip(df_trx[xcol], df_trx[ycol], strict=False): i = int((x - bounds["x_min"]) // TILE_SIZE) j = int((y - bounds["y_min"]) // TILE_SIZE) assert (i, j) in produced_trx_tiles @@ -348,10 +345,7 @@ def test_tiles(make_synthetic_data, technology) -> None: polygons = df_cells["Geometry"] elif technology == "Xenium": df_cells = pd.read_parquet(paths["boundaries_path"]) - polygons = ( - df_cells.groupby("cell_id")[["vertex_x", "vertex_y"]] - .apply(create_cell_polygon) - ) + polygons = df_cells.groupby("cell_id")[["vertex_x", "vertex_y"]].apply(create_cell_polygon) else: raise ValueError(f"Unsupported technology: {technology}") @@ -363,15 +357,10 @@ def test_tiles(make_synthetic_data, technology) -> None: assert len(all_cells) >= expected_cells # Verify that each expected cell polygon maps to a cell tile by centroid location - produced_cell_tiles = { - tuple(map(int, p.stem.split("_")[-2:])) for p in cell_tile_files - } + produced_cell_tiles = {tuple(map(int, p.stem.split("_")[-2:])) for p in cell_tile_files} for poly in polygons: - if not ( - BBOX[0] <= poly.centroid.x < BBOX[1] - and BBOX[2] <= poly.centroid.y < BBOX[3] - ): + if not (BBOX[0] <= poly.centroid.x < BBOX[1] and BBOX[2] <= poly.centroid.y < BBOX[3]): continue i = int((poly.centroid.x - bounds["x_min"]) // TILE_SIZE) j = int((poly.centroid.y - bounds["y_min"]) // TILE_SIZE) - assert (i, j) in produced_cell_tiles \ No newline at end of file + assert (i, j) in produced_cell_tiles diff --git a/tests/unit/test_viz/test_landscape_colors.py b/tests/unit/test_viz/test_landscape_colors.py index 5dc3ff96..988ff56e 100644 --- a/tests/unit/test_viz/test_landscape_colors.py +++ b/tests/unit/test_viz/test_landscape_colors.py @@ -1,31 +1,32 @@ -import numpy as np -import pandas as pd -import pytest import io import json from unittest.mock import patch +import numpy as np +import pandas as pd +import pytest + + try: import anndata as ad + from celldega.viz import Landscape except Exception as e: # pragma: no cover - if deps missing skip pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) + def mock_urlopen_success(*args, **kwargs): fake_json = json.dumps({"technology": "Xenium"}).encode("utf-8") return io.BytesIO(fake_json) + @patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_success) def test_leiden_colors_added_if_missing(mock_urlopen) -> None: adata = ad.AnnData(np.zeros((3, 3))) adata.obs["leiden"] = pd.Categorical(["0", "1", "0"]) adata.uns.pop("leiden_colors", None) - adata.obsm["X_umap"] = np.array([ - [0.0, 0.0], - [1.0, 1.0], - [2.0, 2.0] - ]) + adata.obsm["X_umap"] = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]) widget = Landscape(adata=adata) diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py index cc8bd31d..b22daff0 100644 --- a/tests/unit/test_viz/test_widget.py +++ b/tests/unit/test_viz/test_widget.py @@ -2,10 +2,13 @@ import io import json +from unittest.mock import patch +import warnings + import numpy as np import pandas as pd import pytest -from unittest.mock import patch + try: import geopandas as gpd @@ -17,6 +20,16 @@ pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) +def test_landscape_deprecated_technology_argument_warning(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _ = Landscape(technology="MERSCOPE") + messages = [str(warn.message) for warn in w] + assert any("deprecated" in msg.lower() for msg in messages), ( + "Expected deprecation warning for `technology` argument" + ) + + def make_simple_matrix() -> Matrix: np.random.seed(0) df = pd.DataFrame(np.random.rand(4, 5)) @@ -40,7 +53,7 @@ def test_export_viz_parquet_returns_bytes() -> None: assert set(pq) == expected_keys for key in expected_keys - {"meta"}: - assert isinstance(pq[key], (bytes, bytearray)) + assert isinstance(pq[key], bytes | bytearray) assert pq[key] # ensure non-empty assert isinstance(pq["meta"], dict) @@ -50,11 +63,8 @@ def test_clustergram_initializes_with_parquet() -> None: pq = mat.export_viz_parquet() widget = Clustergram(matrix=mat) - - # Confirm meta is set correctly assert widget.network_meta == pq["meta"] - # Confirm dynamic parquet attributes exist and match expected values for attr, key in [ ("mat_parquet", "mat"), ("row_nodes_parquet", "row_nodes"), @@ -63,15 +73,12 @@ def test_clustergram_initializes_with_parquet() -> None: ("col_linkage_parquet", "col_linkage"), ]: assert hasattr(widget, attr), f"Missing attribute: {attr}" - assert getattr(widget, attr) == pq[key], ( - f"Attribute {attr} does not match expected parquet value" - ) + assert getattr(widget, attr) == pq[key] def test_clustergram_selected_genes_trait() -> None: mat = make_simple_matrix() widget = Clustergram(matrix=mat) - assert widget.selected_genes == [] assert widget.top_n_genes == 50 @@ -79,15 +86,26 @@ def test_clustergram_selected_genes_trait() -> None: assert widget.selected_genes == ["A", "B"] -# ---------- Landscape Patch and Test ---------- +# ---------- Landscape Patch and Tests ---------- + + +class MockHTTPResponse(io.BytesIO): + def __init__(self, data: bytes): + super().__init__(data) + self.headers = {} # Mimic real HTTPResponse -def mock_urlopen_success(*args, **kwargs): - """Mock function to simulate reading landscape_parameters.json.""" - fake_json = json.dumps({"technology": "Xenium"}).encode("utf-8") - return io.BytesIO(fake_json) +def mock_urlopen_with_technology(*args, **kwargs): + """Valid JSON containing technology.""" + return MockHTTPResponse(json.dumps({"technology": "Xenium"}).encode("utf-8")) -@patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_success) + +def mock_urlopen_missing_technology(*args, **kwargs): + """JSON missing the technology field.""" + return MockHTTPResponse(json.dumps({}).encode("utf-8")) + + +@patch("celldega.viz.widget.urllib.request.urlopen", side_effect=mock_urlopen_with_technology) def test_landscape_nbhd_geojson_and_metadata(mock_urlopen) -> None: gdf = gpd.GeoDataFrame( {"name": ["a"], "cat": ["x"]}, @@ -96,10 +114,8 @@ def test_landscape_nbhd_geojson_and_metadata(mock_urlopen) -> None: meta_nbhd = pd.DataFrame({"area": [1]}, index=["a"]) widget = Landscape(nbhd=gdf, meta_nbhd=meta_nbhd) - - # Drop geometry_pixel for comparison gdf = gdf.drop(columns=["geometry_pixel"], errors="ignore") assert widget.nbhd_geojson == json.loads(gdf.to_json()) assert hasattr(widget, "meta_nbhd_parquet") - assert isinstance(widget.meta_nbhd_parquet, (bytes, bytearray)) + assert isinstance(widget.meta_nbhd_parquet, bytes | bytearray) \ No newline at end of file From 92213f1ce7e00dab12326acb30967e2722149142 Mon Sep 17 00:00:00 2001 From: Jaspreet Ishar Date: Thu, 21 Aug 2025 13:45:28 -0400 Subject: [PATCH 16/16] fixing chromium pytest by including placeholder img_width and img_height in save_landscape_parameters --- src/celldega/pre/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/celldega/pre/__init__.py b/src/celldega/pre/__init__.py index 87289fc3..dd8323ee 100644 --- a/src/celldega/pre/__init__.py +++ b/src/celldega/pre/__init__.py @@ -895,6 +895,8 @@ def make_chromium_from_anndata(adata, path_landscape_files): save_landscape_parameters( technology="Chromium", path_landscape_files=path_landscape_files, + image_width=100, + image_height=100, image_name="", tile_size=1, image_info=[],