Merge branch 'main' of https://github.com/laminlabs/lamindb into fix/…

…curate_objects_imports
laminlabs · Oct 9, 2024 · 6ea7a97 · 6ea7a97
2 parents b16ed7f + 96d88ec
commit 6ea7a97
Show file tree

Hide file tree

Showing 20 changed files with 191 additions and 140 deletions.
diff --git a/docs/faq/idempotency.ipynb b/docs/faq/idempotency.ipynb
@@ -70,7 +70,6 @@
     "import lamindb as ln\n",
     "import pytest\n",
     "\n",
-    "ln.settings.verbosity = \"hint\"\n",
     "ln.track(\"ANW20Fr4eZgM0000\")"
    ]
   },
@@ -301,8 +300,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "artifact = ln.Artifact(filepath, description=\"My fcs artifact\")\n",
-    "artifact.save()"
+    "artifact = ln.Artifact(filepath, description=\"My fcs artifact\").save()"
    ]
   },
   {
@@ -316,7 +314,9 @@
    },
    "outputs": [],
    "source": [
-    "assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\""
+    "assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\"\n",
+    "assert artifact.run == ln.context.run\n",
+    "assert len(artifact._previous_runs.all()) == 0"
    ]
   },
   {
@@ -355,7 +355,8 @@
    "outputs": [],
    "source": [
     "assert artifact.id == artifact2.id\n",
-    "assert artifact.run == artifact2.run"
+    "assert artifact.run == artifact2.run\n",
+    "assert len(artifact._previous_runs.all()) == 0"
    ]
   },
   {
@@ -485,8 +486,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\")\n",
-    "artifact4.save()"
+    "artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\").save()"
    ]
   },
   {
@@ -543,6 +543,7 @@
    },
    "outputs": [],
    "source": [
+    "!rm -rf ./test-idempotency\n",
     "!lamin delete --force test-idempotency"
    ]
   }

diff --git a/docs/scripts/run-track-with-params.py b/docs/scripts/run-track-with-params.py
@@ -10,8 +10,11 @@
     params = {
         "input_dir": args.input_dir,
         "learning_rate": args.learning_rate,
-        "downsample": args.downsample,
+        "preprocess_params": {
+            "downsample": args.downsample,
+            "normalization": "the_good_one",
+        },
     }
-    ln.track("JjRF4mACd9m00000", params=params)
+    ln.track("JjRF4mACd9m00001", params=params)
     # your code
     ln.finish()
diff --git a/docs/storage/vitessce.ipynb b/docs/storage/vitessce.ipynb
@@ -82,7 +82,7 @@
     "vc = VitessceConfig(schema_version=\"1.0.15\")\n",
     "vc.add_dataset(name=\"test1\").add_object(\n",
     "    AnnDataWrapper(\n",
-    "        adata_url=dataset_artifact.path.to_url(),\n",
+    "        adata_artifact=dataset_artifact,\n",
     "        obs_embedding_paths=[\"obsm/X_umap\"],\n",
     "    ),\n",
     ")\n",
@@ -134,39 +134,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# use a corrupted suffix\n",
+    "# pass an artifact URL instead of the artifact object itself\n",
     "vc = VitessceConfig(schema_version=\"1.0.15\")\n",
-    "vc.add_dataset(name=\"test1\").add_object(\n",
-    "    AnnDataWrapper(\n",
-    "        adata_url=dataset_artifact.path.to_url() + \".random\",\n",
-    "        obs_embedding_paths=[\"obsm/X_umap\"],\n",
-    "    ),\n",
-    ")\n",
-    "with pytest.raises(ValueError) as error:\n",
-    "    ln.integrations.save_vitessce_config(vc)\n",
-    "print(error.exconly())\n",
-    "assert error.exconly().startswith(\"ValueError: Suffix should be '.zarr' or one of\")\n",
-    "assert error.exconly().endswith(\".random\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# use a corrupted stem_uid\n",
-    "vc = VitessceConfig(schema_version=\"1.0.15\")\n",
-    "vc.add_dataset(name=\"test1\").add_object(\n",
-    "    AnnDataWrapper(\n",
-    "        adata_url=dataset_artifact.path.to_url().replace(dataset_artifact.stem_uid, \"not-a-valid-uid\"),\n",
-    "        obs_embedding_paths=[\"obsm/X_umap\"],\n",
-    "    ),\n",
-    ")\n",
-    "with pytest.raises(ValueError) as error:\n",
-    "    ln.integrations.save_vitessce_config(vc)\n",
+    "with pytest.raises(AttributeError) as error:\n",
+    "    vc.add_dataset(name=\"test1\").add_object(\n",
+    "        AnnDataWrapper(\n",
+    "            adata_artifact=dataset_artifact.path.to_url(),\n",
+    "            obs_embedding_paths=[\"obsm/X_umap\"],\n",
+    "        ),\n",
+    "    )\n",
     "print(error.exconly())\n",
-    "assert error.exconly().startswith(\"ValueError: Could not find dataset with stem uid 'not-a-valid-uid' in lamindb\")"
+    "assert error.exconly().startswith(\"AttributeError: 'str' object has no attribute 'path'\")\n"
    ]
   },
   {

diff --git a/docs/track.ipynb b/docs/track.ipynb
@@ -141,6 +141,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "(track-run-parameters)=\n",
+    "\n",
     "## Track run parameters"
    ]
   },
@@ -165,7 +167,7 @@
     "\n",
     "ln.Param(name=\"input_dir\", dtype=\"str\").save()\n",
     "ln.Param(name=\"learning_rate\", dtype=\"float\").save()\n",
-    "ln.Param(name=\"downsample\", dtype=\"bool\").save()"
+    "ln.Param(name=\"preprocess_params\", dtype=\"dict\").save()"
    ]
   },
   {
@@ -223,14 +225,24 @@
    },
    "outputs": [],
    "source": [
-    "ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\").df()"
+    "ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\", preprocess_params__downsample=True).df()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Look at the parameter values that were used for a given run."
+    "Note that:\n",
+    "\n",
+    "* `preprocess_params__downsample=True` traverses the dictionary `preprocess_params` to find the key `\"downsample\"` and match it to `True`\n",
+    "* nested keys like `\"downsample\"` in a dictionary do not appear in `Param` and hence, do not get validated"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is how you get the parameter values that were used for a given run."
    ]
   },
   {
@@ -247,6 +259,35 @@
     "run.params.get_values()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Or [on the hub](https://lamin.ai/laminlabs/lamindata/transform/JjRF4mACd9m00001).\n",
+    "\n",
+    "<img width=\"500\" alt=\"image\" src=\"https://github.com/user-attachments/assets/d8a5df37-d585-4940-b6f0-91f99b6c436c\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want to query all parameter values across all runs, use {class}`~lamindb.core.ParamValue`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "hide-output"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "ln.core.ParamValue.df(include=[\"param__name\", \"created_by__handle\"])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -297,7 +338,7 @@
    },
    "outputs": [],
    "source": [
-    "assert run.params.get_values() == {'downsample': True, 'input_dir': './mydataset', 'learning_rate': 0.01}\n",
+    "assert run.params.get_values() == {'input_dir': './mydataset', 'learning_rate': 0.01, 'preprocess_params': {'downsample': True, 'normalization': 'the_good_one'}}\n",
     "\n",
     "# clean up test instance\n",
     "!rm -r ./test-track\n",

diff --git a/lamindb/__init__.py b/lamindb/__init__.py
@@ -43,7 +43,7 @@
 """
 
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "0.76.11"
+__version__ = "0.76.12"
 
 import os as _os
 

diff --git a/lamindb/_artifact.py b/lamindb/_artifact.py
@@ -331,9 +331,10 @@ def get_artifact_kwargs_from_data(
         artifact = stat_or_artifact
         # update the run of the existing artifact
         if run is not None:
-            # save the information that this artifact was previously
-            # produced by another run
-            if artifact.run is not None:
+            # save the information that this artifact was previously produced by
+            # another run
+            # note: same logic exists for _output_collections_with_later_updates
+            if artifact.run is not None and artifact.run != run:
                 artifact.run._output_artifacts_with_later_updates.add(artifact)
             # update the run of the artifact with the latest run
             stat_or_artifact.run = run

diff --git a/lamindb/_collection.py b/lamindb/_collection.py
@@ -145,15 +145,16 @@ def __init__(
         logger.warning(
             f"returning existing collection with same hash: {existing_collection}"
         )
-        # update the run of the existing artifact
+        # update the run of the existing collection
         if run is not None:
-            # save the information that this artifact was previously
-            # produced by another run
-            if existing_collection.run is not None:
+            # save the information that this collection was previously produced
+            # by another run
+            # note: same logic exists for _output_artifacts_with_later_updates
+            if existing_collection.run is not None and existing_collection.run != run:
                 existing_collection.run._output_collections_with_later_updates.add(
                     existing_collection
                 )
-            # update the run of the artifact with the latest run
+            # update the run of the collection with the latest run
             existing_collection.run = run
             existing_collection.transform = run.transform
         init_self_from_db(collection, existing_collection)

diff --git a/lamindb/_query_manager.py b/lamindb/_query_manager.py
@@ -98,26 +98,11 @@ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
 
         return _lookup(cls=self.all(), field=field, **kwargs)
 
-    def __getitem__(self, item: str):
-        try:
-            source_field_name = self.source_field_name
-            target_field_name = self.target_field_name
-
-            if (
-                source_field_name in {"artifact", "collection"}
-                and target_field_name == "feature_set"
-            ):
-                return get_feature_set_by_slot_(host=self.instance).get(item)
-
-        except Exception:  # pragma: no cover
-            return
-
 
 models.Manager.list = QueryManager.list
 models.Manager.df = QueryManager.df
 models.Manager.search = QueryManager.search
 models.Manager.lookup = QueryManager.lookup
-models.Manager.__getitem__ = QueryManager.__getitem__
 models.Manager._track_run_input_manager = QueryManager._track_run_input_manager
 # the two lines below would be easy if we could actually inherit; like this,
 # they're suboptimal

diff --git a/lamindb/_query_set.py b/lamindb/_query_set.py
@@ -168,7 +168,7 @@ def df(
             for field in self.model._meta.fields
             if isinstance(field, models.ForeignKey)
         ]
-        for field_name in ["run_id", "updated_at", "created_by_id", "updated_at"]:
+        for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
             if field_name in field_names:
                 field_names.remove(field_name)
                 field_names.append(field_name)

diff --git a/lamindb/_record.py b/lamindb/_record.py
@@ -15,7 +15,7 @@
     update_db_using_local,
 )
 from lamindb_setup.core._docs import doc_args
-from lamindb_setup.core._hub_core import connect_instance
+from lamindb_setup.core._hub_core import connect_instance_hub
 from lamindb_setup.core._settings_store import instance_settings_file
 from lnschema_core.models import IsVersioned, Record, Run, Transform
 
@@ -382,7 +382,7 @@ def using(
     settings_file = instance_settings_file(name, owner)
     cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
     if not settings_file.exists():
-        result = connect_instance(owner=owner, name=name)
+        result = connect_instance_hub(owner=owner, name=name)
         if isinstance(result, str):
             raise RuntimeError(
                 f"Failed to load instance {instance}, please check your permissions!"

diff --git a/lamindb/core/_context.py b/lamindb/core/_context.py
@@ -438,7 +438,7 @@ def get_key_clashing_message(transform: Transform, key: str) -> str:
             )
             return (
                 f'Filename "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
-                f'ln.track("{ids.base62_12()}0000)"\n\n{update_key_note}'
+                f'ln.track("{ids.base62_12()}0000")\n\n{update_key_note}'
             )
 
         # make a new transform record
@@ -577,7 +577,7 @@ def get_shortcut() -> str:
             import nbproject
 
             # it might be that the user modifies the title just before ln.finish()
-            if nbproject_title := nbproject.meta.live.title != self.transform.name:
+            if (nbproject_title := nbproject.meta.live.title) != self.transform.name:
                 self.transform.name = nbproject_title
                 self.transform.save()
             if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING: