From 88ead4933fdaee7ea159121abe29496b73b45dcc Mon Sep 17 00:00:00 2001 From: Nicholas Junge Date: Wed, 30 Aug 2023 14:04:37 +0200 Subject: [PATCH] Add test for file system initialization methods Since we now have three rivaling modes of initialization for the file system, it is time to test the priority order is correct. This brought up a quirk related to instance caching: Since both the `lakectl` config file and environment variables are read implicitly, they do not require arguments, and thus the first method used between config file and envvar initialization will cause cache hits, and suffocate the other method unless the cache is cleared. --- tests/conftest.py | 19 +++++++++++++++++++ tests/test_fs.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 745f1446..abec6c34 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ from typing import Generator, TypeVar import pytest +import yaml from lakefs_client import Configuration from lakefs_client.client import LakeFSClient from lakefs_client.models import BranchCreation, RepositoryCreation @@ -107,3 +108,21 @@ def temp_branch(lakefs_client: LakeFSClient, repository: str) -> YieldFixture[st @pytest.fixture def random_file_factory(tmp_path: Path) -> RandomFileFactory: return RandomFileFactory(path=tmp_path) + + +@pytest.fixture +def temporary_lakectl_config() -> YieldFixture[str]: + d = { + "credentials": {"access_key_id": "hello", "secret_access_key": "world"}, + "server": {"endpoint_url": "http://hello-world-xyz"}, + } + + loc = "~/.lakectl.yaml" + path = Path(loc).expanduser() + + try: + with open(path, "w") as f: + yaml.dump(d, f) + yield loc + finally: + path.unlink() diff --git a/tests/test_fs.py b/tests/test_fs.py index fa56d879..d570f059 100644 --- a/tests/test_fs.py +++ b/tests/test_fs.py @@ -1,3 +1,5 @@ +from pytest import MonkeyPatch + from lakefs_spec import LakeFSFileSystem @@ -20,3 +22,43 @@ def test_instance_caching(fs: LakeFSFileSystem) -> None: assert fs2._fs_token != fs._fs_token assert len(LakeFSFileSystem._cache) == 2 + + +def test_initialization(monkeypatch: MonkeyPatch, temporary_lakectl_config: str) -> None: + """ + Verify the priority order of initialization in lakeFS file system: + 1. direct arguments + 2. environment variables + 3. `lakectl` config file + + NOTE: The configuration appends "/api/v1" to all hostnames by default. + """ + + # Verify behaviors in reverse priority order. + # Case 1: Instantiation from ~/.lakectl.yaml. + cfg_fs = LakeFSFileSystem() + config = cfg_fs.client._api.configuration + assert config.host == "http://hello-world-xyz/api/v1" + assert config.username == "hello" + assert config.password == "world" + + monkeypatch.setenv("LAKEFS_HOST", "http://localhost:1000") + monkeypatch.setenv("LAKEFS_USERNAME", "my-user") + monkeypatch.setenv("LAKEFS_PASSWORD", "my-password-12345") + + # Case 2: Instantiation from envvars. + # Clear the instance cache first, since we otherwise would get a cache hit + # due to the instantiation being the same as from `.lakectl.yaml` above. + LakeFSFileSystem._cache.clear() + envvar_fs = LakeFSFileSystem() + config = envvar_fs.client._api.configuration + assert config.host == "http://localhost:1000/api/v1" + assert config.username == "my-user" + assert config.password == "my-password-12345" + + # Case 3: Explicit initialization. + fs = LakeFSFileSystem(host="http://lakefs.hello", username="my-user", password="my-password") + config = fs.client._api.configuration + assert config.host == "http://lakefs.hello/api/v1" + assert config.username == "my-user" + assert config.password == "my-password"