From c0ef464b02faa5ea56542185fa00e89e88b4127a Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Thu, 3 Oct 2024 11:49:09 +0200 Subject: [PATCH 01/14] =?UTF-8?q?=F0=9F=90=9B=20Fix=20walrus=20and=20dupli?= =?UTF-8?q?cated=20updated=5Fat=20(#2020)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_query_set.py | 2 +- lamindb/core/_context.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lamindb/_query_set.py b/lamindb/_query_set.py index 668667fb2..259896f8f 100644 --- a/lamindb/_query_set.py +++ b/lamindb/_query_set.py @@ -168,7 +168,7 @@ def df( for field in self.model._meta.fields if isinstance(field, models.ForeignKey) ] - for field_name in ["run_id", "updated_at", "created_by_id", "updated_at"]: + for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]: if field_name in field_names: field_names.remove(field_name) field_names.append(field_name) diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index 59fd1818f..0aec2390d 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -577,7 +577,7 @@ def get_shortcut() -> str: import nbproject # it might be that the user modifies the title just before ln.finish() - if nbproject_title := nbproject.meta.live.title != self.transform.name: + if (nbproject_title := nbproject.meta.live.title) != self.transform.name: self.transform.name = nbproject_title self.transform.save() if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING: From c4a7a1eb5b14ce0b9cbc4c5d31c324b9b28ec2f7 Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Fri, 4 Oct 2024 11:16:31 +0200 Subject: [PATCH 02/14] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Migrate=20to=20edge?= =?UTF-8?q?=20function=20in=20`connect()`=20(#2017)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_record.py | 4 ++-- sub/lamindb-setup | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lamindb/_record.py b/lamindb/_record.py index 69116e268..2feb3842d 100644 --- a/lamindb/_record.py +++ b/lamindb/_record.py @@ -15,7 +15,7 @@ update_db_using_local, ) from lamindb_setup.core._docs import doc_args -from lamindb_setup.core._hub_core import connect_instance +from lamindb_setup.core._hub_core import connect_instance_hub from lamindb_setup.core._settings_store import instance_settings_file from lnschema_core.models import IsVersioned, Record, Run, Transform @@ -382,7 +382,7 @@ def using( settings_file = instance_settings_file(name, owner) cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt" if not settings_file.exists(): - result = connect_instance(owner=owner, name=name) + result = connect_instance_hub(owner=owner, name=name) if isinstance(result, str): raise RuntimeError( f"Failed to load instance {instance}, please check your permissions!" diff --git a/sub/lamindb-setup b/sub/lamindb-setup index 9df148619..edb6fef06 160000 --- a/sub/lamindb-setup +++ b/sub/lamindb-setup @@ -1 +1 @@ -Subproject commit 9df148619c96620b71a9fb6b73f577d2073eff26 +Subproject commit edb6fef0636ef9fb5877f416c82522bef98f8bdd From 2c58b44449e8a9a6862b82d09f767650e5bb3751 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 7 Oct 2024 14:19:58 +0200 Subject: [PATCH 03/14] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20Make=20`lamindb-s?= =?UTF-8?q?etup`=20a=20dependency=20of=20`lamin-cli`=20(#2021)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 3 ++- sub/lamin-cli | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e0eec43cc..a0d0c55f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,10 @@ classifiers = [ dependencies = [ # Lamin PINNED packages "lnschema_core==0.74.6", - "lamindb_setup==0.77.7", "lamin_utils==0.13.6", "lamin_cli==0.17.8", + # PINNED in lamin-cli + "lamindb_setup", # others "rapidfuzz", "pyarrow", diff --git a/sub/lamin-cli b/sub/lamin-cli index 8ed07bafd..a0fad05c5 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit 8ed07bafd8e955b1a36b1b606c56ed4b5fa0a1e3 +Subproject commit a0fad05c5492067429f7a939d36649ddad433a2a From 1dee74557db3be45dd8e368a28f5a48e60b9f4e2 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 7 Oct 2024 19:03:44 +0200 Subject: [PATCH 04/14] =?UTF-8?q?=F0=9F=9A=B8=20Query=20with=20typed=20lab?= =?UTF-8?q?els=20through=20`.features`=20(#2023)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_feature_manager.py | 21 ++++++++++++--------- tests/core/test_feature_manager.py | 3 +++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lamindb/core/_feature_manager.py b/lamindb/core/_feature_manager.py index 2caca54a4..9c566f3ea 100644 --- a/lamindb/core/_feature_manager.py +++ b/lamindb/core/_feature_manager.py @@ -509,18 +509,21 @@ def filter_base(cls, **expression): expression = {feature_param: feature, f"value{comparator}": value} feature_value = value_model.filter(**expression) new_expression[f"_{feature_param}_values__in"] = feature_value - else: + elif isinstance(value, (str, Record)): if isinstance(value, str): - expression = {f"name{comparator}": value} - label = ULabel.get(**expression) - new_expression["ulabels"] = label - else: - raise NotImplementedError + expression = {"name": value} + value = ULabel.get(**expression) + accessor_name = ( + value.__class__.artifacts.through.artifact.field._related_name + ) + new_expression[f"{accessor_name}__feature"] = feature + new_expression[f"{accessor_name}__{value.__class__.__name__.lower()}"] = ( + value + ) + else: + raise NotImplementedError if cls == FeatureManager or cls == ParamManagerArtifact: return Artifact.filter(**new_expression) - # might renable something similar in the future - # elif cls == FeatureManagerCollection: - # return Collection.filter(**new_expression) elif cls == ParamManagerRun: return Run.filter(**new_expression) diff --git a/tests/core/test_feature_manager.py b/tests/core/test_feature_manager.py index acb2d6dbf..91baf7bc0 100644 --- a/tests/core/test_feature_manager.py +++ b/tests/core/test_feature_manager.py @@ -203,6 +203,9 @@ def test_features_add(adata): ln.Artifact.features.filter( temperature=100.0, project="project_1", donor="U0123" ).one() + # for bionty + assert artifact == ln.Artifact.features.filter(disease=diseases[0]).one() + # test comparator assert ln.Artifact.features.filter(temperature__lt=21).one_or_none() is None assert len(ln.Artifact.features.filter(temperature__gt=21).all()) >= 1 From ba1d89f7aa5eb4b9cbad21544e17ed7c142fe380 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 7 Oct 2024 21:38:59 +0200 Subject: [PATCH 05/14] =?UTF-8?q?=F0=9F=93=9D=20Document=20how=20to=20quer?= =?UTF-8?q?y=20by=20nested=20run=20parameters=20(#2024)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/scripts/run-track-with-params.py | 7 ++-- docs/track.ipynb | 49 ++++++++++++++++++++++++--- sub/lnschema-core | 2 +- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/docs/scripts/run-track-with-params.py b/docs/scripts/run-track-with-params.py index 2ec473d77..74e6ac8dd 100644 --- a/docs/scripts/run-track-with-params.py +++ b/docs/scripts/run-track-with-params.py @@ -10,8 +10,11 @@ params = { "input_dir": args.input_dir, "learning_rate": args.learning_rate, - "downsample": args.downsample, + "preprocess_params": { + "downsample": args.downsample, + "normalization": "the_good_one", + }, } - ln.track("JjRF4mACd9m00000", params=params) + ln.track("JjRF4mACd9m00001", params=params) # your code ln.finish() diff --git a/docs/track.ipynb b/docs/track.ipynb index a18868df3..267dd0922 100644 --- a/docs/track.ipynb +++ b/docs/track.ipynb @@ -141,6 +141,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "(track-run-parameters)=\n", + "\n", "## Track run parameters" ] }, @@ -165,7 +167,7 @@ "\n", "ln.Param(name=\"input_dir\", dtype=\"str\").save()\n", "ln.Param(name=\"learning_rate\", dtype=\"float\").save()\n", - "ln.Param(name=\"downsample\", dtype=\"bool\").save()" + "ln.Param(name=\"preprocess_params\", dtype=\"dict\").save()" ] }, { @@ -223,14 +225,24 @@ }, "outputs": [], "source": [ - "ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\").df()" + "ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\", preprocess_params__downsample=True).df()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Look at the parameter values that were used for a given run." + "Note that:\n", + "\n", + "* `preprocess_params__downsample=True` traverses the dictionary `preprocess_params` to find the key `\"downsample\"` and match it to `True`\n", + "* nested keys like `\"downsample\"` in a dictionary do not appear in `Param` and hence, do not get validated" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below is how you get the parameter values that were used for a given run." ] }, { @@ -247,6 +259,35 @@ "run.params.get_values()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or [on the hub](https://lamin.ai/laminlabs/lamindata/transform/JjRF4mACd9m00001).\n", + "\n", + "\"image\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to query all parameter values across all runs, use {class}`~lamindb.core.ParamValue`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "ln.core.ParamValue.df(include=[\"param__name\", \"created_by__handle\"])" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -297,7 +338,7 @@ }, "outputs": [], "source": [ - "assert run.params.get_values() == {'downsample': True, 'input_dir': './mydataset', 'learning_rate': 0.01}\n", + "assert run.params.get_values() == {'input_dir': './mydataset', 'learning_rate': 0.01, 'preprocess_params': {'downsample': True, 'normalization': 'the_good_one'}}\n", "\n", "# clean up test instance\n", "!rm -r ./test-track\n", diff --git a/sub/lnschema-core b/sub/lnschema-core index 9a2687a59..7d14d949f 160000 --- a/sub/lnschema-core +++ b/sub/lnschema-core @@ -1 +1 @@ -Subproject commit 9a2687a591cbb99c25c9eace8ae944ec45cd4a3b +Subproject commit 7d14d949f6b295c900bd4a161f4e0794356b7ae4 From b47445af8278b43686fff9eaa211f73bd915247f Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 8 Oct 2024 11:02:08 +0200 Subject: [PATCH 06/14] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Prepare=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- sub/lamin-cli | 2 +- sub/lamindb-setup | 2 +- sub/lnschema-core | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a0d0c55f9..58a371806 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,9 @@ classifiers = [ ] dependencies = [ # Lamin PINNED packages - "lnschema_core==0.74.6", + "lnschema_core==0.75.0", "lamin_utils==0.13.6", - "lamin_cli==0.17.8", + "lamin_cli==0.18.0", # PINNED in lamin-cli "lamindb_setup", # others diff --git a/sub/lamin-cli b/sub/lamin-cli index a0fad05c5..7b18516a2 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit a0fad05c5492067429f7a939d36649ddad433a2a +Subproject commit 7b18516a242c298b5817dfff6a50c36eea51a71b diff --git a/sub/lamindb-setup b/sub/lamindb-setup index edb6fef06..e4d6e9edb 160000 --- a/sub/lamindb-setup +++ b/sub/lamindb-setup @@ -1 +1 @@ -Subproject commit edb6fef0636ef9fb5877f416c82522bef98f8bdd +Subproject commit e4d6e9edbc0fa07113d6fd37a71d827579f97545 diff --git a/sub/lnschema-core b/sub/lnschema-core index 7d14d949f..9cff76a3d 160000 --- a/sub/lnschema-core +++ b/sub/lnschema-core @@ -1 +1 @@ -Subproject commit 7d14d949f6b295c900bd4a161f4e0794356b7ae4 +Subproject commit 9cff76a3d0d4dbe4149949ca9955618064262a9c From 5aeb0f99a2f288a850dd11da5c18c6bc52907538 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Tue, 8 Oct 2024 05:28:09 -0400 Subject: [PATCH 07/14] =?UTF-8?q?=E2=9C=A8=20Overhaul=20`save=5Fvitessce?= =?UTF-8?q?=5Fconfig()`=20to=20support=20multiple=20artifacts=20and=20non-?= =?UTF-8?q?`.zarr`=20(#1953)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alex Wolf --- docs/storage/vitessce.ipynb | 42 ++++----------- lamindb/integrations/_vitessce.py | 86 ++++++++++++++----------------- 2 files changed, 49 insertions(+), 79 deletions(-) diff --git a/docs/storage/vitessce.ipynb b/docs/storage/vitessce.ipynb index b7215f889..1f8121cf2 100644 --- a/docs/storage/vitessce.ipynb +++ b/docs/storage/vitessce.ipynb @@ -82,7 +82,7 @@ "vc = VitessceConfig(schema_version=\"1.0.15\")\n", "vc.add_dataset(name=\"test1\").add_object(\n", " AnnDataWrapper(\n", - " adata_url=dataset_artifact.path.to_url(),\n", + " adata_artifact=dataset_artifact,\n", " obs_embedding_paths=[\"obsm/X_umap\"],\n", " ),\n", ")\n", @@ -134,39 +134,17 @@ "metadata": {}, "outputs": [], "source": [ - "# use a corrupted suffix\n", + "# pass an artifact URL instead of the artifact object itself\n", "vc = VitessceConfig(schema_version=\"1.0.15\")\n", - "vc.add_dataset(name=\"test1\").add_object(\n", - " AnnDataWrapper(\n", - " adata_url=dataset_artifact.path.to_url() + \".random\",\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " ),\n", - ")\n", - "with pytest.raises(ValueError) as error:\n", - " ln.integrations.save_vitessce_config(vc)\n", - "print(error.exconly())\n", - "assert error.exconly().startswith(\"ValueError: Suffix should be '.zarr' or one of\")\n", - "assert error.exconly().endswith(\".random\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# use a corrupted stem_uid\n", - "vc = VitessceConfig(schema_version=\"1.0.15\")\n", - "vc.add_dataset(name=\"test1\").add_object(\n", - " AnnDataWrapper(\n", - " adata_url=dataset_artifact.path.to_url().replace(dataset_artifact.stem_uid, \"not-a-valid-uid\"),\n", - " obs_embedding_paths=[\"obsm/X_umap\"],\n", - " ),\n", - ")\n", - "with pytest.raises(ValueError) as error:\n", - " ln.integrations.save_vitessce_config(vc)\n", + "with pytest.raises(AttributeError) as error:\n", + " vc.add_dataset(name=\"test1\").add_object(\n", + " AnnDataWrapper(\n", + " adata_artifact=dataset_artifact.path.to_url(),\n", + " obs_embedding_paths=[\"obsm/X_umap\"],\n", + " ),\n", + " )\n", "print(error.exconly())\n", - "assert error.exconly().startswith(\"ValueError: Could not find dataset with stem uid 'not-a-valid-uid' in lamindb\")" + "assert error.exconly().startswith(\"AttributeError: 'str' object has no attribute 'path'\")\n" ] }, { diff --git a/lamindb/integrations/_vitessce.py b/lamindb/integrations/_vitessce.py index 51ed457d1..bb74960bd 100644 --- a/lamindb/integrations/_vitessce.py +++ b/lamindb/integrations/_vitessce.py @@ -8,6 +8,7 @@ from lamin_utils import logger from lamindb._artifact import Artifact +from lamindb._collection import Collection from lamindb._run import Run from lamindb._transform import Transform @@ -20,14 +21,21 @@ def save_vitessce_config( vitessce_config: VitessceConfig, description: str | None = None ) -> Artifact: - """Validates and saves a ``VitessceConfig`` object. + """Validates and saves a `VitessceConfig` object. + + If the `VitessceConfig` object references multiple artifacts, automatically + creates a `Collection` and displays the "Vitessce button" next to it. Guide: :doc:`docs:vitessce`. Args: - vitessce_config (``VitessceConfig``): A `VitessceConfig` object. - description: A description for the `VitessceConfig` artifact. + vitessce_config: A `VitessceConfig` object. + description: A description for the `VitessceConfig` object. Is used as + `name` for a `Collection` in case the `VitessceConfig` object + references multiple artifacts. + .. versionchanged:: 0.76.12 + Now assumes `vitessce-python >= 3.4.0`, which allows passing artifacts within `VitessceConfig`. .. versionchanged:: 0.75.1 Now displays the "Vitessce button" on the hub next to the dataset. It additionally keeps displaying it next to the configuration file. .. versionchanged:: 0.70.2 @@ -40,53 +48,34 @@ def save_vitessce_config( assert isinstance(vitessce_config, VitessceConfig) # noqa: S101 vc_dict = vitessce_config.to_dict() - valid_composite_zarr_suffixes = [ - suffix for suffix in VALID_SUFFIXES.COMPOSITE if suffix.endswith(".zarr") - ] - # validate - dataset_artifacts = [] - assert vc_dict["datasets"] # noqa: S101 - for vitessce_dataset in vc_dict["datasets"]: - # didn't find good ways to violate the below, hence using plain asserts - # without user feedback - assert "files" in vitessce_dataset # noqa: S101 - assert vitessce_dataset["files"] # noqa: S101 - for file in vitessce_dataset["files"]: - if "url" not in file: - raise ValueError("Each file must have a 'url' key.") - s3_path = file["url"] - s3_path_last_element = s3_path.split("/")[-1] - # now start with attempting to strip the composite suffix candidates - for suffix in valid_composite_zarr_suffixes: - s3_path_last_element = s3_path_last_element.replace(suffix, "") - # in case there was no hit, strip plain ".zarr" - artifact_stem_uid = s3_path_last_element.replace(".zarr", "") - # if there is still a "." in string, raise an error - if "." in artifact_stem_uid: - raise ValueError( - f"Suffix should be '.zarr' or one of {valid_composite_zarr_suffixes}. Inspect your path {s3_path}" - ) - artifact = Artifact.filter(uid__startswith=artifact_stem_uid).one_or_none() - if artifact is None: - raise ValueError( - f"Could not find dataset with stem uid '{artifact_stem_uid}' in lamindb: {vitessce_dataset}. Did you follow https://docs.lamin.ai/vitessce? It appears the AWS S3 path doesn't encode a lamindb uid." - ) - else: - dataset_artifacts.append(artifact) + try: + url_to_artifact_dict = vitessce_config.get_artifacts() + except AttributeError as e: + logger.error( + "Artifact registration requires vitessce package version 3.4.0 or higher." + ) + raise e + dataset_artifacts = list(url_to_artifact_dict.values()) + if len(dataset_artifacts) == 0: + logger.warning( + "No artifacts were registered in this config. If intending to visualize data from artifacts, use _artifact parameters of Vitessce wrapper class constructors to facilitate registration." + ) + # the below will be replaced with a `ln.tracked()` decorator soon - with logger.mute(): - transform = Transform( - uid="kup03MJBsIVa0001", - name="save_vitessce_config", - type="function", - version="2", - ).save() + transform = Transform( + uid="kup03MJBsIVa0002", + name="save_vitessce_config", + type="function", + version="3", + ).save() run = Run(transform=transform).save() + run.input_artifacts.set(dataset_artifacts) + collection = None if len(dataset_artifacts) > 1: # if we have more datasets, we should create a collection # and attach an action to the collection - raise NotImplementedError - run.input_artifacts.set(dataset_artifacts) + collection = Collection(dataset_artifacts, name=description).save() + # create a JSON export config_file_local_path = ln_setup.settings.cache_dir / "config.vitessce.json" with open(config_file_local_path, "w") as file: @@ -94,8 +83,11 @@ def save_vitessce_config( vitessce_config_artifact = Artifact( config_file_local_path, description=description, run=run ).save() - # we have one and only one dataset artifact, hence the following line is OK - dataset_artifacts[0]._actions.add(vitessce_config_artifact) + if collection is None: + # we have one and only one dataset artifact, hence the following line is OK + dataset_artifacts[0]._actions.add(vitessce_config_artifact) + else: + collection._actions.add(vitessce_config_artifact) slug = ln_setup.settings.instance.slug logger.important( f"go to: https://lamin.ai/{slug}/artifact/{vitessce_config_artifact.uid}" From 85c5e3c6c062ae25f78e669d0311fbd793b396ce Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 8 Oct 2024 12:54:44 +0200 Subject: [PATCH 08/14] =?UTF-8?q?=E2=9C=85=20Add=20more=20tests=20for=20qu?= =?UTF-8?q?erying=20via=20`.features`=20(#2028)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_feature_manager.py | 42 +++++++++++++++++++++++++----- lamindb/core/exceptions.py | 2 +- tests/core/test_feature_manager.py | 12 ++++++++- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/lamindb/core/_feature_manager.py b/lamindb/core/_feature_manager.py index 9c566f3ea..b51d1c12a 100644 --- a/lamindb/core/_feature_manager.py +++ b/lamindb/core/_feature_manager.py @@ -40,7 +40,7 @@ transfer_to_default_db, ) from lamindb._save import save -from lamindb.core.exceptions import ValidationError +from lamindb.core.exceptions import DoesNotExist, ValidationError from lamindb.core.storage import LocalPathClasses from ._django import get_artifact_with_related @@ -510,17 +510,45 @@ def filter_base(cls, **expression): feature_value = value_model.filter(**expression) new_expression[f"_{feature_param}_values__in"] = feature_value elif isinstance(value, (str, Record)): + # because SQL is sensitive to whether querying with __in or not + # and might return multiple equivalent records for the latter + # we distinguish cases in which we have multiple label matches vs. one + label = None + labels = None if isinstance(value, str): - expression = {"name": value} - value = ULabel.get(**expression) + # we need the comparator here because users might query like so + # ln.Artifact.features.filter(experiment__contains="Experi") + expression = {f"name{comparator}": value} + labels = ULabel.filter(**expression).all() + if len(labels) == 0: + raise DoesNotExist( + f"Did not find a ULabel matching `name{comparator}={value}`" + ) + elif len(labels) == 1: + label = labels[0] + elif isinstance(value, Record): + label = value + label_registry = ( + label.__class__ if label is not None else labels[0].__class__ + ) accessor_name = ( - value.__class__.artifacts.through.artifact.field._related_name + label_registry.artifacts.through.artifact.field._related_name ) new_expression[f"{accessor_name}__feature"] = feature - new_expression[f"{accessor_name}__{value.__class__.__name__.lower()}"] = ( - value - ) + if label is not None: + # simplified query if we have exactly one label + new_expression[ + f"{accessor_name}__{label_registry.__name__.lower()}" + ] = label + else: + new_expression[ + f"{accessor_name}__{label_registry.__name__.lower()}__in" + ] = labels else: + # if passing a list of records, we want to + # find artifacts that are annotated by all of them at the same + # time; hence, we don't want the __in construct that we use to match strings + # https://laminlabs.slack.com/archives/C04FPE8V01W/p1688328084810609 raise NotImplementedError if cls == FeatureManager or cls == ParamManagerArtifact: return Artifact.filter(**new_expression) diff --git a/lamindb/core/exceptions.py b/lamindb/core/exceptions.py index 66a5e3676..96c8903f7 100644 --- a/lamindb/core/exceptions.py +++ b/lamindb/core/exceptions.py @@ -45,7 +45,7 @@ class ValidationError(SystemExit): # inspired by Django's DoesNotExist # equivalent to SQLAlchemy's NoResultFound -class DoesNotExist(Exception): +class DoesNotExist(SystemExit): """No record found.""" pass diff --git a/tests/core/test_feature_manager.py b/tests/core/test_feature_manager.py index 91baf7bc0..ea9936a64 100644 --- a/tests/core/test_feature_manager.py +++ b/tests/core/test_feature_manager.py @@ -3,7 +3,7 @@ import bionty as bt import lamindb as ln import pytest -from lamindb.core.exceptions import ValidationError +from lamindb.core.exceptions import DoesNotExist, ValidationError @pytest.fixture(scope="module") @@ -206,7 +206,17 @@ def test_features_add(adata): # for bionty assert artifact == ln.Artifact.features.filter(disease=diseases[0]).one() + # test not finding the ULabel + with pytest.raises(DoesNotExist) as error: + ln.Artifact.features.get(project="project__1") + assert error.exconly().startswith( + "lamindb.core.exceptions.DoesNotExist: Did not find a ULabel matching" + ) + # test comparator + assert artifact == ln.Artifact.features.filter(experiment__contains="ment 1").one() + # due to the __in comparator, we get the same artifact twice below + assert len(ln.Artifact.features.filter(experiment__contains="Experi").all()) == 2 assert ln.Artifact.features.filter(temperature__lt=21).one_or_none() is None assert len(ln.Artifact.features.filter(temperature__gt=21).all()) >= 1 From d29b10fffbddeddd8ee613868222f0946d3ca570 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 8 Oct 2024 13:35:56 +0200 Subject: [PATCH 09/14] =?UTF-8?q?=F0=9F=94=8A=20More=20logging=20in=20`sav?= =?UTF-8?q?e=5Fvitessce=5Fconfig()`=20(#2029)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/integrations/_vitessce.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/lamindb/integrations/_vitessce.py b/lamindb/integrations/_vitessce.py index bb74960bd..13e78fa76 100644 --- a/lamindb/integrations/_vitessce.py +++ b/lamindb/integrations/_vitessce.py @@ -51,15 +51,13 @@ def save_vitessce_config( try: url_to_artifact_dict = vitessce_config.get_artifacts() except AttributeError as e: - logger.error( - "Artifact registration requires vitessce package version 3.4.0 or higher." - ) - raise e + raise SystemExit( + "save_vitessce_config() requires vitessce>=3.4.0: pip install vitessce>=3.4.0" + ) from e dataset_artifacts = list(url_to_artifact_dict.values()) - if len(dataset_artifacts) == 0: - logger.warning( - "No artifacts were registered in this config. If intending to visualize data from artifacts, use _artifact parameters of Vitessce wrapper class constructors to facilitate registration." - ) + message = "\n".join([artifact.__repr__() for artifact in dataset_artifacts]) + logger.important(f"VitessceConfig references these artifacts:\n{message}") + assert len(dataset_artifacts) > 0 # noqa: S101 # the below will be replaced with a `ln.tracked()` decorator soon transform = Transform( @@ -83,15 +81,21 @@ def save_vitessce_config( vitessce_config_artifact = Artifact( config_file_local_path, description=description, run=run ).save() + slug = ln_setup.settings.instance.slug + logger.important( + f"VitessceConfig: https://lamin.ai/{slug}/artifact/{vitessce_config_artifact.uid}" + ) if collection is None: # we have one and only one dataset artifact, hence the following line is OK dataset_artifacts[0]._actions.add(vitessce_config_artifact) + logger.important( + f"Dataset: https://lamin.ai/{slug}/artifact/{dataset_artifacts[0].uid}" + ) else: collection._actions.add(vitessce_config_artifact) - slug = ln_setup.settings.instance.slug - logger.important( - f"go to: https://lamin.ai/{slug}/artifact/{vitessce_config_artifact.uid}" - ) + logger.important( + f"Collection: https://lamin.ai/{slug}/collection/{collection.uid}" + ) run.finished_at = datetime.now(timezone.utc) run.save() return vitessce_config_artifact From 64717fb69b26b2c5b44d465a3675495108c7d855 Mon Sep 17 00:00:00 2001 From: Sunny Sun <38218185+sunnyosun@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:06:42 +0200 Subject: [PATCH 10/14] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Update=20bionty?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 2 +- sub/bionty | 2 +- sub/clinicore | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 58a371806..11c640027 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ Home = "https://github.com/laminlabs/lamindb" [project.optional-dependencies] bionty = [ - "bionty==0.51.1", + "bionty==0.51.2", ] aws = [ "lamindb_setup[aws]", diff --git a/sub/bionty b/sub/bionty index 212184862..fef92d6ae 160000 --- a/sub/bionty +++ b/sub/bionty @@ -1 +1 @@ -Subproject commit 212184862a1a37785853488956450572818239a7 +Subproject commit fef92d6ae1b44ee494fb065338c932c4351ce7ad diff --git a/sub/clinicore b/sub/clinicore index 90d3536ef..18098d7d8 160000 --- a/sub/clinicore +++ b/sub/clinicore @@ -1 +1 @@ -Subproject commit 90d3536ef9cbd1aada3a3690c8e984f05434c9ca +Subproject commit 18098d7d8277a42df56e5c639b94d2d93d739bbf From 2e3ebeac61608f1a83f50583890db23bfec595d7 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 8 Oct 2024 14:37:43 +0200 Subject: [PATCH 11/14] =?UTF-8?q?=F0=9F=94=96=20Release=200.76.12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lamindb/__init__.py b/lamindb/__init__.py index ec78fa763..c0a2c379d 100644 --- a/lamindb/__init__.py +++ b/lamindb/__init__.py @@ -43,7 +43,7 @@ """ # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc. -__version__ = "0.76.11" +__version__ = "0.76.12" import os as _os From 5abfc412c256fb9a4da89b5414ace57b9dcbd2b8 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 8 Oct 2024 22:44:07 +0200 Subject: [PATCH 12/14] =?UTF-8?q?=F0=9F=90=9B=20Do=20not=20double=20track?= =?UTF-8?q?=20runs=20(#2032)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/faq/idempotency.ipynb | 15 ++++++++------- lamindb/_artifact.py | 7 ++++--- lamindb/_collection.py | 11 ++++++----- lamindb/core/_label_manager.py | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/docs/faq/idempotency.ipynb b/docs/faq/idempotency.ipynb index 06f37aa0c..566d9ec65 100644 --- a/docs/faq/idempotency.ipynb +++ b/docs/faq/idempotency.ipynb @@ -70,7 +70,6 @@ "import lamindb as ln\n", "import pytest\n", "\n", - "ln.settings.verbosity = \"hint\"\n", "ln.track(\"ANW20Fr4eZgM0000\")" ] }, @@ -301,8 +300,7 @@ "metadata": {}, "outputs": [], "source": [ - "artifact = ln.Artifact(filepath, description=\"My fcs artifact\")\n", - "artifact.save()" + "artifact = ln.Artifact(filepath, description=\"My fcs artifact\").save()" ] }, { @@ -316,7 +314,9 @@ }, "outputs": [], "source": [ - "assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\"" + "assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\"\n", + "assert artifact.run == ln.context.run\n", + "assert len(artifact._previous_runs.all()) == 0" ] }, { @@ -355,7 +355,8 @@ "outputs": [], "source": [ "assert artifact.id == artifact2.id\n", - "assert artifact.run == artifact2.run" + "assert artifact.run == artifact2.run\n", + "assert len(artifact._previous_runs.all()) == 0" ] }, { @@ -485,8 +486,7 @@ "metadata": {}, "outputs": [], "source": [ - "artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\")\n", - "artifact4.save()" + "artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\").save()" ] }, { @@ -543,6 +543,7 @@ }, "outputs": [], "source": [ + "!rm -rf ./test-idempotency\n", "!lamin delete --force test-idempotency" ] } diff --git a/lamindb/_artifact.py b/lamindb/_artifact.py index 08a750d3d..50279777b 100644 --- a/lamindb/_artifact.py +++ b/lamindb/_artifact.py @@ -331,9 +331,10 @@ def get_artifact_kwargs_from_data( artifact = stat_or_artifact # update the run of the existing artifact if run is not None: - # save the information that this artifact was previously - # produced by another run - if artifact.run is not None: + # save the information that this artifact was previously produced by + # another run + # note: same logic exists for _output_collections_with_later_updates + if artifact.run is not None and artifact.run != run: artifact.run._output_artifacts_with_later_updates.add(artifact) # update the run of the artifact with the latest run stat_or_artifact.run = run diff --git a/lamindb/_collection.py b/lamindb/_collection.py index c1dc0cc92..edebd76a5 100644 --- a/lamindb/_collection.py +++ b/lamindb/_collection.py @@ -145,15 +145,16 @@ def __init__( logger.warning( f"returning existing collection with same hash: {existing_collection}" ) - # update the run of the existing artifact + # update the run of the existing collection if run is not None: - # save the information that this artifact was previously - # produced by another run - if existing_collection.run is not None: + # save the information that this collection was previously produced + # by another run + # note: same logic exists for _output_artifacts_with_later_updates + if existing_collection.run is not None and existing_collection.run != run: existing_collection.run._output_collections_with_later_updates.add( existing_collection ) - # update the run of the artifact with the latest run + # update the run of the collection with the latest run existing_collection.run = run existing_collection.transform = run.transform init_self_from_db(collection, existing_collection) diff --git a/lamindb/core/_label_manager.py b/lamindb/core/_label_manager.py index c14d84e0e..d3c084160 100644 --- a/lamindb/core/_label_manager.py +++ b/lamindb/core/_label_manager.py @@ -39,7 +39,7 @@ def get_labels_as_dict(self: Artifact | Collection, links: bool = False): "links_collection", "links_artifact", "links_feature_set", - "previous_runs", + "_previous_runs", "_feature_values", "_action_targets", "_lnschema_core_collection__actions_+", # something seems off with this one From 13d992c6ed6fbad2902f2db5c0c36c238597d51d Mon Sep 17 00:00:00 2001 From: Sunny Sun <38218185+sunnyosun@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:22:56 +0200 Subject: [PATCH 13/14] =?UTF-8?q?=F0=9F=94=A5=20Remove=20`=5F=5Fgetitem=5F?= =?UTF-8?q?=5F`=20from=20`QuerySet`=20(#2034)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_query_manager.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/lamindb/_query_manager.py b/lamindb/_query_manager.py index 1be3bda57..b02567e73 100644 --- a/lamindb/_query_manager.py +++ b/lamindb/_query_manager.py @@ -98,26 +98,11 @@ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple: return _lookup(cls=self.all(), field=field, **kwargs) - def __getitem__(self, item: str): - try: - source_field_name = self.source_field_name - target_field_name = self.target_field_name - - if ( - source_field_name in {"artifact", "collection"} - and target_field_name == "feature_set" - ): - return get_feature_set_by_slot_(host=self.instance).get(item) - - except Exception: # pragma: no cover - return - models.Manager.list = QueryManager.list models.Manager.df = QueryManager.df models.Manager.search = QueryManager.search models.Manager.lookup = QueryManager.lookup -models.Manager.__getitem__ = QueryManager.__getitem__ models.Manager._track_run_input_manager = QueryManager._track_run_input_manager # the two lines below would be easy if we could actually inherit; like this, # they're suboptimal From 96d88eccd6b3ccf97417191354acfe57e2496e2f Mon Sep 17 00:00:00 2001 From: Lukas Heumos Date: Wed, 9 Oct 2024 14:31:36 +0200 Subject: [PATCH 14/14] =?UTF-8?q?=F0=9F=90=9BFix=20track=20message=20(#203?= =?UTF-8?q?6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py index 0aec2390d..d0163fbc1 100644 --- a/lamindb/core/_context.py +++ b/lamindb/core/_context.py @@ -438,7 +438,7 @@ def get_key_clashing_message(transform: Transform, key: str) -> str: ) return ( f'Filename "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n' - f'ln.track("{ids.base62_12()}0000)"\n\n{update_key_note}' + f'ln.track("{ids.base62_12()}0000")\n\n{update_key_note}' ) # make a new transform record