diff --git a/docs/analysis-flow-scripts/register_example_file.py b/docs/analysis-flow-scripts/register_example_file.py index ddf3aac..e696364 100644 --- a/docs/analysis-flow-scripts/register_example_file.py +++ b/docs/analysis-flow-scripts/register_example_file.py @@ -2,7 +2,7 @@ import bionty as bt -ln.context.uid = "QkQJXJlOTsyM0000" +ln.context.uid = "K4wsS5DTYdFp0001" ln.context.track() # an example dataset that has a few cell type, tissue and disease annotations diff --git a/docs/analysis-flow.ipynb b/docs/analysis-flow.ipynb deleted file mode 100644 index b4c602f..0000000 --- a/docs/analysis-flow.ipynb +++ /dev/null @@ -1,380 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[![Jupyter Notebook](https://img.shields.io/badge/Source%20on%20GitHub-orange)](https://github.com/laminlabs/lamin-usecases/blob/main/docs/analysis-flow.ipynb)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Analysis flow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we'll track typical data transformations like subsetting that occur during analysis.\n", - "\n", - "If exploring more generally, read this first: {doc}`/project-flow`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "# !pip install 'lamindb[jupyter,bionty]'\n", - "!lamin init --storage ./analysis-usecase --schema bionty" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import lamindb as ln\n", - "import bionty as bt\n", - "from lamin_utils import logger" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Register an initial dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we register an initial artifact with a pipeline script [register_example_file.py](https://github.com/laminlabs/lamin-usecases/blob/main/docs/analysis-flow-scripts/register_example_file.py)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "!python analysis-flow-scripts/register_example_file.py" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Pull the registered dataset, apply a transformation, and register the result" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Track the current notebook:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ln.context.uid = \"eNef4Arw8nNM0000\"\n", - "ln.context.track()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "artifact = ln.Artifact.get(description=\"anndata with obs\")\n", - "artifact.describe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get a backed AnnData object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata = artifact.open()\n", - "adata" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Subset dataset to specific cell types and diseases" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cell_types = artifact.cell_types.all().lookup(return_field=\"name\")\n", - "diseases = artifact.diseases.all().lookup(return_field=\"name\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create the subset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subset_obs = adata.obs.cell_type.isin(\n", - " [cell_types.t_cell, cell_types.hematopoietic_stem_cell]\n", - ") & (adata.obs.disease.isin([diseases.liver_lymphoma, diseases.chronic_kidney_disease]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_subset = adata[subset_obs]\n", - "adata_subset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "adata_subset.obs[[\"cell_type\", \"disease\"]].value_counts()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Register the subsetted AnnData:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "curate = ln.Curate.from_anndata(\n", - " adata_subset.to_memory(), \n", - " var_index=bt.Gene.ensembl_gene_id, \n", - " categoricals={\n", - " \"cell_type\": bt.CellType.name, \n", - " \"disease\": bt.Disease.name, \n", - " \"tissue\": bt.Tissue.name,\n", - " },\n", - " organism=\"human\"\n", - ")\n", - "\n", - "curate.validate()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "artifact = curate.save_artifact(description=\"anndata with obs subset\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "artifact.describe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Examine data flow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Query a subsetted `.h5ad` artifact containing \"hematopoietic stem cell\" and \"T cell\":" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cell_types = bt.CellType.lookup()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "my_subset = ln.Artifact.filter(\n", - " suffix=\".h5ad\",\n", - " description__endswith=\"subset\",\n", - " cell_types__in=[\n", - " cell_types.hematopoietic_stem_cell,\n", - " cell_types.t_cell,\n", - " ],\n", - ").first()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "my_subset" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Common questions that might arise are:\n", - "\n", - "- What is the history of this artifact?\n", - "- Which features and labels are associated with it?\n", - "- Which notebook analyzed and registered this artifact?\n", - "- By whom?\n", - "- And which artifact is its parent?\n", - "\n", - "Let's answer this using LaminDB:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"--> What is the history of this artifact?\\n\")\n", - "artifact.view_lineage()\n", - "\n", - "print(\"\\n\\n--> Which features and labels are associated with it?\\n\")\n", - "logger.print(artifact.features)\n", - "logger.print(artifact.labels)\n", - "\n", - "print(\"\\n\\n--> Which notebook analyzed and registered this artifact\\n\")\n", - "logger.print(artifact.transform)\n", - "\n", - "print(\"\\n\\n--> By whom\\n\")\n", - "logger.print(artifact.created_by)\n", - "\n", - "print(\"\\n\\n--> And which artifact is its parent\\n\")\n", - "display(artifact.run.input_artifacts.df())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-cell" - ] - }, - "outputs": [], - "source": [ - "!lamin delete --force analysis-usecase\n", - "!rm -r ./analysis-usecase" - ] - } - ], - "metadata": { - "citation-manager": { - "items": {} - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "nbproject": { - "id": "eNef4Arw8nNM", - "parent": null, - "pypackage": null, - "time_init": "2023-06-05T13:33:30.647656+00:00", - "user_handle": "sunnyosun", - "user_id": "kmvZDIX9", - "user_name": "Sunny Sun", - "version": "0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}