Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…curate_objects_imports
  • Loading branch information
Zethson committed Oct 9, 2024
2 parents b16ed7f + 96d88ec commit 6ea7a97
Show file tree
Hide file tree
Showing 20 changed files with 191 additions and 140 deletions.
15 changes: 8 additions & 7 deletions docs/faq/idempotency.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
"import lamindb as ln\n",
"import pytest\n",
"\n",
"ln.settings.verbosity = \"hint\"\n",
"ln.track(\"ANW20Fr4eZgM0000\")"
]
},
Expand Down Expand Up @@ -301,8 +300,7 @@
"metadata": {},
"outputs": [],
"source": [
"artifact = ln.Artifact(filepath, description=\"My fcs artifact\")\n",
"artifact.save()"
"artifact = ln.Artifact(filepath, description=\"My fcs artifact\").save()"
]
},
{
Expand All @@ -316,7 +314,9 @@
},
"outputs": [],
"source": [
"assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\""
"assert artifact.hash == \"KCEXRahJ-Ui9Y6nksQ8z1A\"\n",
"assert artifact.run == ln.context.run\n",
"assert len(artifact._previous_runs.all()) == 0"
]
},
{
Expand Down Expand Up @@ -355,7 +355,8 @@
"outputs": [],
"source": [
"assert artifact.id == artifact2.id\n",
"assert artifact.run == artifact2.run"
"assert artifact.run == artifact2.run\n",
"assert len(artifact._previous_runs.all()) == 0"
]
},
{
Expand Down Expand Up @@ -485,8 +486,7 @@
"metadata": {},
"outputs": [],
"source": [
"artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\")\n",
"artifact4.save()"
"artifact4 = ln.Artifact(filepath, description=\"My new fcs artifact\").save()"
]
},
{
Expand Down Expand Up @@ -543,6 +543,7 @@
},
"outputs": [],
"source": [
"!rm -rf ./test-idempotency\n",
"!lamin delete --force test-idempotency"
]
}
Expand Down
7 changes: 5 additions & 2 deletions docs/scripts/run-track-with-params.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
params = {
"input_dir": args.input_dir,
"learning_rate": args.learning_rate,
"downsample": args.downsample,
"preprocess_params": {
"downsample": args.downsample,
"normalization": "the_good_one",
},
}
ln.track("JjRF4mACd9m00000", params=params)
ln.track("JjRF4mACd9m00001", params=params)
# your code
ln.finish()
42 changes: 10 additions & 32 deletions docs/storage/vitessce.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"vc = VitessceConfig(schema_version=\"1.0.15\")\n",
"vc.add_dataset(name=\"test1\").add_object(\n",
" AnnDataWrapper(\n",
" adata_url=dataset_artifact.path.to_url(),\n",
" adata_artifact=dataset_artifact,\n",
" obs_embedding_paths=[\"obsm/X_umap\"],\n",
" ),\n",
")\n",
Expand Down Expand Up @@ -134,39 +134,17 @@
"metadata": {},
"outputs": [],
"source": [
"# use a corrupted suffix\n",
"# pass an artifact URL instead of the artifact object itself\n",
"vc = VitessceConfig(schema_version=\"1.0.15\")\n",
"vc.add_dataset(name=\"test1\").add_object(\n",
" AnnDataWrapper(\n",
" adata_url=dataset_artifact.path.to_url() + \".random\",\n",
" obs_embedding_paths=[\"obsm/X_umap\"],\n",
" ),\n",
")\n",
"with pytest.raises(ValueError) as error:\n",
" ln.integrations.save_vitessce_config(vc)\n",
"print(error.exconly())\n",
"assert error.exconly().startswith(\"ValueError: Suffix should be '.zarr' or one of\")\n",
"assert error.exconly().endswith(\".random\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# use a corrupted stem_uid\n",
"vc = VitessceConfig(schema_version=\"1.0.15\")\n",
"vc.add_dataset(name=\"test1\").add_object(\n",
" AnnDataWrapper(\n",
" adata_url=dataset_artifact.path.to_url().replace(dataset_artifact.stem_uid, \"not-a-valid-uid\"),\n",
" obs_embedding_paths=[\"obsm/X_umap\"],\n",
" ),\n",
")\n",
"with pytest.raises(ValueError) as error:\n",
" ln.integrations.save_vitessce_config(vc)\n",
"with pytest.raises(AttributeError) as error:\n",
" vc.add_dataset(name=\"test1\").add_object(\n",
" AnnDataWrapper(\n",
" adata_artifact=dataset_artifact.path.to_url(),\n",
" obs_embedding_paths=[\"obsm/X_umap\"],\n",
" ),\n",
" )\n",
"print(error.exconly())\n",
"assert error.exconly().startswith(\"ValueError: Could not find dataset with stem uid 'not-a-valid-uid' in lamindb\")"
"assert error.exconly().startswith(\"AttributeError: 'str' object has no attribute 'path'\")\n"
]
},
{
Expand Down
49 changes: 45 additions & 4 deletions docs/track.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"(track-run-parameters)=\n",
"\n",
"## Track run parameters"
]
},
Expand All @@ -165,7 +167,7 @@
"\n",
"ln.Param(name=\"input_dir\", dtype=\"str\").save()\n",
"ln.Param(name=\"learning_rate\", dtype=\"float\").save()\n",
"ln.Param(name=\"downsample\", dtype=\"bool\").save()"
"ln.Param(name=\"preprocess_params\", dtype=\"dict\").save()"
]
},
{
Expand Down Expand Up @@ -223,14 +225,24 @@
},
"outputs": [],
"source": [
"ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\").df()"
"ln.Run.params.filter(learning_rate=0.01, input_dir=\"./mydataset\", preprocess_params__downsample=True).df()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Look at the parameter values that were used for a given run."
"Note that:\n",
"\n",
"* `preprocess_params__downsample=True` traverses the dictionary `preprocess_params` to find the key `\"downsample\"` and match it to `True`\n",
"* nested keys like `\"downsample\"` in a dictionary do not appear in `Param` and hence, do not get validated"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below is how you get the parameter values that were used for a given run."
]
},
{
Expand All @@ -247,6 +259,35 @@
"run.params.get_values()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or [on the hub](https://lamin.ai/laminlabs/lamindata/transform/JjRF4mACd9m00001).\n",
"\n",
"<img width=\"500\" alt=\"image\" src=\"https://github.com/user-attachments/assets/d8a5df37-d585-4940-b6f0-91f99b6c436c\">"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to query all parameter values across all runs, use {class}`~lamindb.core.ParamValue`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"hide-output"
]
},
"outputs": [],
"source": [
"ln.core.ParamValue.df(include=[\"param__name\", \"created_by__handle\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -297,7 +338,7 @@
},
"outputs": [],
"source": [
"assert run.params.get_values() == {'downsample': True, 'input_dir': './mydataset', 'learning_rate': 0.01}\n",
"assert run.params.get_values() == {'input_dir': './mydataset', 'learning_rate': 0.01, 'preprocess_params': {'downsample': True, 'normalization': 'the_good_one'}}\n",
"\n",
"# clean up test instance\n",
"!rm -r ./test-track\n",
Expand Down
2 changes: 1 addition & 1 deletion lamindb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"""

# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
__version__ = "0.76.11"
__version__ = "0.76.12"

import os as _os

Expand Down
7 changes: 4 additions & 3 deletions lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,10 @@ def get_artifact_kwargs_from_data(
artifact = stat_or_artifact
# update the run of the existing artifact
if run is not None:
# save the information that this artifact was previously
# produced by another run
if artifact.run is not None:
# save the information that this artifact was previously produced by
# another run
# note: same logic exists for _output_collections_with_later_updates
if artifact.run is not None and artifact.run != run:
artifact.run._output_artifacts_with_later_updates.add(artifact)
# update the run of the artifact with the latest run
stat_or_artifact.run = run
Expand Down
11 changes: 6 additions & 5 deletions lamindb/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,16 @@ def __init__(
logger.warning(
f"returning existing collection with same hash: {existing_collection}"
)
# update the run of the existing artifact
# update the run of the existing collection
if run is not None:
# save the information that this artifact was previously
# produced by another run
if existing_collection.run is not None:
# save the information that this collection was previously produced
# by another run
# note: same logic exists for _output_artifacts_with_later_updates
if existing_collection.run is not None and existing_collection.run != run:
existing_collection.run._output_collections_with_later_updates.add(
existing_collection
)
# update the run of the artifact with the latest run
# update the run of the collection with the latest run
existing_collection.run = run
existing_collection.transform = run.transform
init_self_from_db(collection, existing_collection)
Expand Down
15 changes: 0 additions & 15 deletions lamindb/_query_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,26 +98,11 @@ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:

return _lookup(cls=self.all(), field=field, **kwargs)

def __getitem__(self, item: str):
try:
source_field_name = self.source_field_name
target_field_name = self.target_field_name

if (
source_field_name in {"artifact", "collection"}
and target_field_name == "feature_set"
):
return get_feature_set_by_slot_(host=self.instance).get(item)

except Exception: # pragma: no cover
return


models.Manager.list = QueryManager.list
models.Manager.df = QueryManager.df
models.Manager.search = QueryManager.search
models.Manager.lookup = QueryManager.lookup
models.Manager.__getitem__ = QueryManager.__getitem__
models.Manager._track_run_input_manager = QueryManager._track_run_input_manager
# the two lines below would be easy if we could actually inherit; like this,
# they're suboptimal
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_query_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def df(
for field in self.model._meta.fields
if isinstance(field, models.ForeignKey)
]
for field_name in ["run_id", "updated_at", "created_by_id", "updated_at"]:
for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
if field_name in field_names:
field_names.remove(field_name)
field_names.append(field_name)
Expand Down
4 changes: 2 additions & 2 deletions lamindb/_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
update_db_using_local,
)
from lamindb_setup.core._docs import doc_args
from lamindb_setup.core._hub_core import connect_instance
from lamindb_setup.core._hub_core import connect_instance_hub
from lamindb_setup.core._settings_store import instance_settings_file
from lnschema_core.models import IsVersioned, Record, Run, Transform

Expand Down Expand Up @@ -382,7 +382,7 @@ def using(
settings_file = instance_settings_file(name, owner)
cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
if not settings_file.exists():
result = connect_instance(owner=owner, name=name)
result = connect_instance_hub(owner=owner, name=name)
if isinstance(result, str):
raise RuntimeError(
f"Failed to load instance {instance}, please check your permissions!"
Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def get_key_clashing_message(transform: Transform, key: str) -> str:
)
return (
f'Filename "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
f'ln.track("{ids.base62_12()}0000)"\n\n{update_key_note}'
f'ln.track("{ids.base62_12()}0000")\n\n{update_key_note}'
)

# make a new transform record
Expand Down Expand Up @@ -577,7 +577,7 @@ def get_shortcut() -> str:
import nbproject

# it might be that the user modifies the title just before ln.finish()
if nbproject_title := nbproject.meta.live.title != self.transform.name:
if (nbproject_title := nbproject.meta.live.title) != self.transform.name:
self.transform.name = nbproject_title
self.transform.save()
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
Expand Down
Loading

0 comments on commit 6ea7a97

Please sign in to comment.