From 9070cad6c2076196079bf4020a3ad2379bda310c Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Wed, 2 Oct 2024 12:43:22 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Move=20lamindb=5Fsetup.set?= =?UTF-8?q?tings.storage.cache=5Fdir=20to=20lamindb=5Fsetup.settings.cache?= =?UTF-8?q?=5Fdir=20(#2013)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/setup.ipynb | 52 ++++++++++++------- .../prepare-transfer-local-to-cloud.ipynb | 2 +- lamindb/_artifact.py | 6 +-- lamindb/_finish.py | 6 +-- lamindb/_record.py | 8 +-- lamindb/_save.py | 2 +- lamindb/core/_settings.py | 5 ++ lamindb/core/_sync_git.py | 4 +- lamindb/core/_track_environment.py | 2 +- lamindb/integrations/_vitessce.py | 4 +- noxfile.py | 2 +- sub/lamin-cli | 2 +- sub/lamindb-setup | 2 +- tests/core/test_cache.py | 12 ++--- 14 files changed, 61 insertions(+), 48 deletions(-) diff --git a/docs/setup.ipynb b/docs/setup.ipynb index 47a91d84a..4255f19c6 100644 --- a/docs/setup.ipynb +++ b/docs/setup.ipynb @@ -254,25 +254,39 @@ "source": [ "`lamindb` mantains cache for cloud instances, i.e. instances having storage set to an AWS S3 bucket.\n", "\n", - "Cache directory can be accessed via {class}`lamindb.settings`.\n", - "\n", - "```\n", - "ln.settings.storage.cache_dir\n", - "```\n", - "\n", - "or print the cache directory path with CLI\n", - "\n", - "```\n", - "lamin cache get\n", - "```\n", - "\n", - "It can be configured using the settings\n", - "\n", - "```\n", - "ln.settings.storage.cache_dir = \"some/path/to/cache\"\n", - "```\n", - "\n", - "or using CLI\n", + "Cache directory can be accessed via {class}`lamindb.settings`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ln.settings.cache_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "or print the cache directory path with CLI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!lamin cache get" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It can be also configured via \n", "\n", "```\n", "lamin cache set some/path/to/cache\n", diff --git a/docs/storage/prepare-transfer-local-to-cloud.ipynb b/docs/storage/prepare-transfer-local-to-cloud.ipynb index bfd3e741a..7cb0d592a 100644 --- a/docs/storage/prepare-transfer-local-to-cloud.ipynb +++ b/docs/storage/prepare-transfer-local-to-cloud.ipynb @@ -34,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "ln.setup.init(storage=\"./test-transfer-to-cloud\", schema=\"bionty,wetlab\")\n", + "ln.setup.init(storage=\"./test-transfer-to-cloud\", schema=\"bionty,wetlab,findrefs\")\n", "ln.setup.settings.auto_connect = False" ] }, diff --git a/lamindb/_artifact.py b/lamindb/_artifact.py index f8d48fe12..43f10005f 100644 --- a/lamindb/_artifact.py +++ b/lamindb/_artifact.py @@ -180,7 +180,7 @@ def process_data( f" be '{suffix}'." ) cache_name = f"{provisional_uid}{suffix}" - path = settings.storage.cache_dir / cache_name + path = settings.cache_dir / cache_name # Alex: I don't understand the line below if path.suffixes == []: path = path.with_suffix(suffix) @@ -344,7 +344,7 @@ def get_artifact_kwargs_from_data( if revises is not None: # update provisional_uid provisional_uid, revises = create_uid(revises=revises, version=version) - if settings.storage.cache_dir in path.parents: + if settings.cache_dir in path.parents: path = path.rename(path.with_name(f"{provisional_uid}{suffix}")) check_path_in_storage = False @@ -1125,7 +1125,7 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact: raise RuntimeError(exception) if local_path is not None and not state_was_adding: # only move the local artifact to cache if it was not newly created - local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name + local_path_cache = ln_setup.settings.cache_dir / local_path.name # don't use Path.rename here because of cross-device link error # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969 shutil.move( diff --git a/lamindb/_finish.py b/lamindb/_finish.py index f067ff9db..64b8cdcec 100644 --- a/lamindb/_finish.py +++ b/lamindb/_finish.py @@ -130,12 +130,12 @@ def save_context_core( if response != "y": return "aborted-non-consecutive" # write the report - report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace( + report_path = ln_setup.settings.cache_dir / filepath.name.replace( ".ipynb", ".html" ) notebook_to_report(filepath, report_path) # write the source code - source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace( + source_code_path = ln_setup.settings.cache_dir / filepath.name.replace( ".ipynb", ".py" ) notebook_to_script(transform, filepath, source_code_path) @@ -171,7 +171,7 @@ def save_context_core( transform.hash = hash # track environment - env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt" + env_path = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt" if env_path.exists(): overwrite_env = True if run.environment_id is not None and from_cli: diff --git a/lamindb/_record.py b/lamindb/_record.py index b79fb2d2a..69116e268 100644 --- a/lamindb/_record.py +++ b/lamindb/_record.py @@ -380,9 +380,7 @@ def using( return QuerySet(model=cls, using=None) owner, name = get_owner_name_from_identifier(instance) settings_file = instance_settings_file(name, owner) - cache_filepath = ( - ln_setup.settings.storage.cache_dir / f"instance--{owner}--{name}--uid.txt" - ) + cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt" if not settings_file.exists(): result = connect_instance(owner=owner, name=name) if isinstance(result, str): @@ -469,9 +467,7 @@ def get_transfer_run(record) -> Run: slug = record._state.db owner, name = get_owner_name_from_identifier(slug) - cache_filepath = ( - ln_setup.settings.storage.cache_dir / f"instance--{owner}--{name}--uid.txt" - ) + cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt" if not cache_filepath.exists(): raise SystemExit("Need to call .using() before") instance_uid = cache_filepath.read_text() diff --git a/lamindb/_save.py b/lamindb/_save.py index 26a787c8d..a0cce6f3d 100644 --- a/lamindb/_save.py +++ b/lamindb/_save.py @@ -168,7 +168,7 @@ def copy_or_move_to_cache( local_path = local_path.resolve() is_dir = local_path.is_dir() - cache_dir = settings._storage_settings.cache_dir + cache_dir = settings.cache_dir # just delete from the cache dir if storage_path is local if cache_path is None: diff --git a/lamindb/core/_settings.py b/lamindb/core/_settings.py index 9c00834ed..28d430578 100644 --- a/lamindb/core/_settings.py +++ b/lamindb/core/_settings.py @@ -143,6 +143,11 @@ def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]) path, kwargs = path_kwargs, {} set_managed_storage(path, **kwargs) + @property + def cache_dir(self) -> UPath: + """Cache root, a local directory to cache cloud files.""" + return ln_setup.settings.cache_dir + @property def storage_local(self) -> StorageSettings: """An additional local default storage (a path to its root). diff --git a/lamindb/core/_sync_git.py b/lamindb/core/_sync_git.py index affc9c03b..a52e7b17f 100644 --- a/lamindb/core/_sync_git.py +++ b/lamindb/core/_sync_git.py @@ -16,7 +16,7 @@ class BlobHashNotFound(SystemExit): def get_git_repo_from_remote() -> Path: repo_url = settings.sync_git_repo - repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1] + repo_dir = setup_settings.cache_dir / repo_url.split("/")[-1] if repo_dir.exists(): logger.warning(f"git repo {repo_dir} already exists locally") return repo_dir @@ -26,7 +26,7 @@ def get_git_repo_from_remote() -> Path: result = subprocess.run( ["git", "clone", "--depth", "10", f"{repo_url}.git"], capture_output=True, - cwd=setup_settings.storage.cache_dir, + cwd=setup_settings.cache_dir, ) if result.returncode != 0 or not repo_dir.exists(): raise RuntimeError(result.stderr.decode()) diff --git a/lamindb/core/_track_environment.py b/lamindb/core/_track_environment.py index 197408072..4548506a0 100644 --- a/lamindb/core/_track_environment.py +++ b/lamindb/core/_track_environment.py @@ -11,7 +11,7 @@ def track_environment(run: Run) -> None: - filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt" + filepath = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt" # create a requirements.txt # we don't create a conda environment.yml mostly for its slowness try: diff --git a/lamindb/integrations/_vitessce.py b/lamindb/integrations/_vitessce.py index 95872bb52..51ed457d1 100644 --- a/lamindb/integrations/_vitessce.py +++ b/lamindb/integrations/_vitessce.py @@ -88,9 +88,7 @@ def save_vitessce_config( raise NotImplementedError run.input_artifacts.set(dataset_artifacts) # create a JSON export - config_file_local_path = ( - ln_setup.settings.storage.cache_dir / "config.vitessce.json" - ) + config_file_local_path = ln_setup.settings.cache_dir / "config.vitessce.json" with open(config_file_local_path, "w") as file: json.dump(vc_dict, file) vitessce_config_artifact = Artifact( diff --git a/noxfile.py b/noxfile.py index b7f5ff112..e5a8d8ceb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -109,7 +109,7 @@ def install_ci(session, group): extras += "aws,zarr,bionty,jupyter" run( session, - "uv pip install --system --no-deps ./sub/wetlab", + "uv pip install --system --no-deps ./sub/wetlab ./sub/findrefs", ) run(session, "uv pip install --system vitessce") elif group == "docs": diff --git a/sub/lamin-cli b/sub/lamin-cli index bf4e24a17..8ed07bafd 160000 --- a/sub/lamin-cli +++ b/sub/lamin-cli @@ -1 +1 @@ -Subproject commit bf4e24a179342d955c7a4ae4678c17ed078ca2e6 +Subproject commit 8ed07bafd8e955b1a36b1b606c56ed4b5fa0a1e3 diff --git a/sub/lamindb-setup b/sub/lamindb-setup index b35835b8f..9df148619 160000 --- a/sub/lamindb-setup +++ b/sub/lamindb-setup @@ -1 +1 @@ -Subproject commit b35835b8ffecd56dad2f24749b3ed3ea71cf8623 +Subproject commit 9df148619c96620b71a9fb6b73f577d2073eff26 diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py index 85882baba..c20296104 100644 --- a/tests/core/test_cache.py +++ b/tests/core/test_cache.py @@ -29,7 +29,7 @@ def test_local_cache(): artifact = ln.Artifact.from_anndata(adata, key="test_cache.h5ad") temp_path = artifact._local_filepath.resolve() assert temp_path.exists() - assert ln.setup.settings.storage.cache_dir in temp_path.parents + assert ln.setup.settings.cache_dir in temp_path.parents artifact.save() assert artifact.path.exists() @@ -52,7 +52,7 @@ def test_local_cache(): artifact.delete(permanent=True) # check directories in cache - cache_dir = ln.setup.settings.storage.cache_dir + cache_dir = ln.setup.settings.cache_dir adata_zarr_pth = cache_dir / "test_adata.zarr" adata.write_zarr(adata_zarr_pth) @@ -71,7 +71,7 @@ def test_cloud_cache(switch_storage): # check that we have cloud storage assert ln.setup.settings.storage.root_as_str == switch_storage - cache_dir = ln.setup.settings.storage.cache_dir + cache_dir = ln.setup.settings.cache_dir assert cache_dir is not None test_file = ln.core.datasets.anndata_file_pbmc68k_test() @@ -142,11 +142,11 @@ def test_cloud_cache(switch_storage): def test_cloud_cache_versions(switch_storage): adata = load_h5ad(ln.core.datasets.anndata_file_pbmc68k_test()) - cache_dir = ln.setup.settings.storage.cache_dir + cache_dir = ln.setup.settings.cache_dir assert cache_dir is not None artifact = ln.Artifact.from_anndata(adata, key="test_cache.h5ad") - assert ln.settings.storage.cache_dir in artifact._local_filepath.parents + assert ln.settings.cache_dir in artifact._local_filepath.parents artifact.save() cache_path_v1 = artifact.cache() assert cache_path_v1.exists() @@ -169,7 +169,7 @@ def test_cloud_cache_versions(switch_storage): artifact_v2 = ln.Artifact.from_anndata( adata, key="test_cache.h5ad", revises=artifact ) - assert ln.settings.storage.cache_dir in artifact_v2._local_filepath.parents + assert ln.settings.cache_dir in artifact_v2._local_filepath.parents artifact_v2.save() assert artifact_v2.is_latest assert not artifact.is_latest