Skip to content

Commit

Permalink
♻️ Move lamindb_setup.settings.storage.cache_dir to lamindb_setup.set…
Browse files Browse the repository at this point in the history
…tings.cache_dir (#2013)
  • Loading branch information
Koncopd authored Oct 2, 2024
1 parent 254027f commit 9070cad
Show file tree
Hide file tree
Showing 14 changed files with 61 additions and 48 deletions.
52 changes: 33 additions & 19 deletions docs/setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -254,25 +254,39 @@
"source": [
"`lamindb` mantains cache for cloud instances, i.e. instances having storage set to an AWS S3 bucket.\n",
"\n",
"Cache directory can be accessed via {class}`lamindb.settings`.\n",
"\n",
"```\n",
"ln.settings.storage.cache_dir\n",
"```\n",
"\n",
"or print the cache directory path with CLI\n",
"\n",
"```\n",
"lamin cache get\n",
"```\n",
"\n",
"It can be configured using the settings\n",
"\n",
"```\n",
"ln.settings.storage.cache_dir = \"some/path/to/cache\"\n",
"```\n",
"\n",
"or using CLI\n",
"Cache directory can be accessed via {class}`lamindb.settings`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ln.settings.cache_dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"or print the cache directory path with CLI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!lamin cache get"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It can be also configured via \n",
"\n",
"```\n",
"lamin cache set some/path/to/cache\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/storage/prepare-transfer-local-to-cloud.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"metadata": {},
"outputs": [],
"source": [
"ln.setup.init(storage=\"./test-transfer-to-cloud\", schema=\"bionty,wetlab\")\n",
"ln.setup.init(storage=\"./test-transfer-to-cloud\", schema=\"bionty,wetlab,findrefs\")\n",
"ln.setup.settings.auto_connect = False"
]
},
Expand Down
6 changes: 3 additions & 3 deletions lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def process_data(
f" be '{suffix}'."
)
cache_name = f"{provisional_uid}{suffix}"
path = settings.storage.cache_dir / cache_name
path = settings.cache_dir / cache_name
# Alex: I don't understand the line below
if path.suffixes == []:
path = path.with_suffix(suffix)
Expand Down Expand Up @@ -344,7 +344,7 @@ def get_artifact_kwargs_from_data(

if revises is not None: # update provisional_uid
provisional_uid, revises = create_uid(revises=revises, version=version)
if settings.storage.cache_dir in path.parents:
if settings.cache_dir in path.parents:
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))

check_path_in_storage = False
Expand Down Expand Up @@ -1125,7 +1125,7 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
raise RuntimeError(exception)
if local_path is not None and not state_was_adding:
# only move the local artifact to cache if it was not newly created
local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
local_path_cache = ln_setup.settings.cache_dir / local_path.name
# don't use Path.rename here because of cross-device link error
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
shutil.move(
Expand Down
6 changes: 3 additions & 3 deletions lamindb/_finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ def save_context_core(
if response != "y":
return "aborted-non-consecutive"
# write the report
report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
report_path = ln_setup.settings.cache_dir / filepath.name.replace(
".ipynb", ".html"
)
notebook_to_report(filepath, report_path)
# write the source code
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
source_code_path = ln_setup.settings.cache_dir / filepath.name.replace(
".ipynb", ".py"
)
notebook_to_script(transform, filepath, source_code_path)
Expand Down Expand Up @@ -171,7 +171,7 @@ def save_context_core(
transform.hash = hash

# track environment
env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
env_path = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
if env_path.exists():
overwrite_env = True
if run.environment_id is not None and from_cli:
Expand Down
8 changes: 2 additions & 6 deletions lamindb/_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,7 @@ def using(
return QuerySet(model=cls, using=None)
owner, name = get_owner_name_from_identifier(instance)
settings_file = instance_settings_file(name, owner)
cache_filepath = (
ln_setup.settings.storage.cache_dir / f"instance--{owner}--{name}--uid.txt"
)
cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
if not settings_file.exists():
result = connect_instance(owner=owner, name=name)
if isinstance(result, str):
Expand Down Expand Up @@ -469,9 +467,7 @@ def get_transfer_run(record) -> Run:

slug = record._state.db
owner, name = get_owner_name_from_identifier(slug)
cache_filepath = (
ln_setup.settings.storage.cache_dir / f"instance--{owner}--{name}--uid.txt"
)
cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
if not cache_filepath.exists():
raise SystemExit("Need to call .using() before")
instance_uid = cache_filepath.read_text()
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def copy_or_move_to_cache(

local_path = local_path.resolve()
is_dir = local_path.is_dir()
cache_dir = settings._storage_settings.cache_dir
cache_dir = settings.cache_dir

# just delete from the cache dir if storage_path is local
if cache_path is None:
Expand Down
5 changes: 5 additions & 0 deletions lamindb/core/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping])
path, kwargs = path_kwargs, {}
set_managed_storage(path, **kwargs)

@property
def cache_dir(self) -> UPath:
"""Cache root, a local directory to cache cloud files."""
return ln_setup.settings.cache_dir

@property
def storage_local(self) -> StorageSettings:
"""An additional local default storage (a path to its root).
Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_sync_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class BlobHashNotFound(SystemExit):

def get_git_repo_from_remote() -> Path:
repo_url = settings.sync_git_repo
repo_dir = setup_settings.storage.cache_dir / repo_url.split("/")[-1]
repo_dir = setup_settings.cache_dir / repo_url.split("/")[-1]
if repo_dir.exists():
logger.warning(f"git repo {repo_dir} already exists locally")
return repo_dir
Expand All @@ -26,7 +26,7 @@ def get_git_repo_from_remote() -> Path:
result = subprocess.run(
["git", "clone", "--depth", "10", f"{repo_url}.git"],
capture_output=True,
cwd=setup_settings.storage.cache_dir,
cwd=setup_settings.cache_dir,
)
if result.returncode != 0 or not repo_dir.exists():
raise RuntimeError(result.stderr.decode())
Expand Down
2 changes: 1 addition & 1 deletion lamindb/core/_track_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def track_environment(run: Run) -> None:
filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
filepath = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
# create a requirements.txt
# we don't create a conda environment.yml mostly for its slowness
try:
Expand Down
4 changes: 1 addition & 3 deletions lamindb/integrations/_vitessce.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ def save_vitessce_config(
raise NotImplementedError
run.input_artifacts.set(dataset_artifacts)
# create a JSON export
config_file_local_path = (
ln_setup.settings.storage.cache_dir / "config.vitessce.json"
)
config_file_local_path = ln_setup.settings.cache_dir / "config.vitessce.json"
with open(config_file_local_path, "w") as file:
json.dump(vc_dict, file)
vitessce_config_artifact = Artifact(
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def install_ci(session, group):
extras += "aws,zarr,bionty,jupyter"
run(
session,
"uv pip install --system --no-deps ./sub/wetlab",
"uv pip install --system --no-deps ./sub/wetlab ./sub/findrefs",
)
run(session, "uv pip install --system vitessce")
elif group == "docs":
Expand Down
2 changes: 1 addition & 1 deletion sub/lamin-cli
12 changes: 6 additions & 6 deletions tests/core/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_local_cache():
artifact = ln.Artifact.from_anndata(adata, key="test_cache.h5ad")
temp_path = artifact._local_filepath.resolve()
assert temp_path.exists()
assert ln.setup.settings.storage.cache_dir in temp_path.parents
assert ln.setup.settings.cache_dir in temp_path.parents

artifact.save()
assert artifact.path.exists()
Expand All @@ -52,7 +52,7 @@ def test_local_cache():
artifact.delete(permanent=True)

# check directories in cache
cache_dir = ln.setup.settings.storage.cache_dir
cache_dir = ln.setup.settings.cache_dir
adata_zarr_pth = cache_dir / "test_adata.zarr"
adata.write_zarr(adata_zarr_pth)

Expand All @@ -71,7 +71,7 @@ def test_cloud_cache(switch_storage):
# check that we have cloud storage
assert ln.setup.settings.storage.root_as_str == switch_storage

cache_dir = ln.setup.settings.storage.cache_dir
cache_dir = ln.setup.settings.cache_dir
assert cache_dir is not None

test_file = ln.core.datasets.anndata_file_pbmc68k_test()
Expand Down Expand Up @@ -142,11 +142,11 @@ def test_cloud_cache(switch_storage):
def test_cloud_cache_versions(switch_storage):
adata = load_h5ad(ln.core.datasets.anndata_file_pbmc68k_test())

cache_dir = ln.setup.settings.storage.cache_dir
cache_dir = ln.setup.settings.cache_dir
assert cache_dir is not None

artifact = ln.Artifact.from_anndata(adata, key="test_cache.h5ad")
assert ln.settings.storage.cache_dir in artifact._local_filepath.parents
assert ln.settings.cache_dir in artifact._local_filepath.parents
artifact.save()
cache_path_v1 = artifact.cache()
assert cache_path_v1.exists()
Expand All @@ -169,7 +169,7 @@ def test_cloud_cache_versions(switch_storage):
artifact_v2 = ln.Artifact.from_anndata(
adata, key="test_cache.h5ad", revises=artifact
)
assert ln.settings.storage.cache_dir in artifact_v2._local_filepath.parents
assert ln.settings.cache_dir in artifact_v2._local_filepath.parents
artifact_v2.save()
assert artifact_v2.is_latest
assert not artifact.is_latest
Expand Down

0 comments on commit 9070cad

Please sign in to comment.