diff --git a/src/datachain/catalog/catalog.py b/src/datachain/catalog/catalog.py index 909523628..7e83fa464 100644 --- a/src/datachain/catalog/catalog.py +++ b/src/datachain/catalog/catalog.py @@ -680,8 +680,9 @@ def _row_to_node(d: dict[str, Any]) -> Node: ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name) assert ds_namespace assert ds_project - project = self.metastore.get_project(ds_project, ds_namespace) - dataset = self.get_dataset(ds_name, project) + dataset = self.get_dataset( + ds_name, namespace_name=ds_namespace, project_name=ds_project + ) if not ds_version: ds_version = dataset.latest_version dataset_sources = self.warehouse.get_dataset_sources( @@ -807,7 +808,11 @@ def create_dataset( ) default_version = DEFAULT_DATASET_VERSION try: - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=project.namespace.name if project else None, + project_name=project.name if project else None, + ) default_version = dataset.next_version_patch if update_version == "major": default_version = dataset.next_version_major @@ -1016,7 +1021,11 @@ def create_dataset_from_sources( dc.save(name) except Exception as e: # noqa: BLE001 try: - ds = self.get_dataset(name, project) + ds = self.get_dataset( + name, + namespace_name=project.namespace.name, + project_name=project.name, + ) self.metastore.update_dataset_status( ds, DatasetStatus.FAILED, @@ -1033,7 +1042,11 @@ def create_dataset_from_sources( except DatasetNotFoundError: raise e from None - ds = self.get_dataset(name, project) + ds = self.get_dataset( + name, + namespace_name=project.namespace.name, + project_name=project.name, + ) self.update_dataset_version_with_warehouse_info( ds, @@ -1041,7 +1054,11 @@ def create_dataset_from_sources( sources="\n".join(sources), ) - return self.get_dataset(name, project) + return self.get_dataset( + name, + namespace_name=project.namespace.name, + project_name=project.name, + ) def get_full_dataset_name( self, @@ -1077,22 +1094,23 @@ def get_full_dataset_name( return namespace_name, project_name, name def get_dataset( - self, name: str, project: Optional[Project] = None + self, + name: str, + namespace_name: Optional[str] = None, + project_name: Optional[str] = None, ) -> DatasetRecord: from datachain.lib.listing import is_listing_dataset - project = project or self.metastore.default_project + namespace_name = namespace_name or self.metastore.default_namespace_name + project_name = project_name or self.metastore.default_project_name if is_listing_dataset(name): - project = self.metastore.listing_project + namespace_name = self.metastore.system_namespace_name + project_name = self.metastore.listing_project_name - try: - return self.metastore.get_dataset(name, project.id if project else None) - except DatasetNotFoundError: - raise DatasetNotFoundError( - f"Dataset {name} not found in namespace {project.namespace.name}" - f" and project {project.name}" - ) from None + return self.metastore.get_dataset( + name, namespace_name=namespace_name, project_name=project_name + ) def get_dataset_with_remote_fallback( self, @@ -1113,8 +1131,11 @@ def get_dataset_with_remote_fallback( if self.metastore.is_local_dataset(namespace_name) or not update: try: - project = self.metastore.get_project(project_name, namespace_name) - ds = self.get_dataset(name, project) + ds = self.get_dataset( + name, + namespace_name=namespace_name, + project_name=project_name, + ) if not version or ds.has_version(version): return ds except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError): @@ -1139,7 +1160,9 @@ def get_dataset_with_remote_fallback( local_ds_version=version, ) return self.get_dataset( - name, self.metastore.get_project(project_name, namespace_name) + name, + namespace_name=namespace_name, + project_name=project_name, ) return self.get_remote_dataset(namespace_name, project_name, name) @@ -1148,7 +1171,11 @@ def get_dataset_with_version_uuid(self, uuid: str) -> DatasetRecord: """Returns dataset that contains version with specific uuid""" for dataset in self.ls_datasets(): if dataset.has_version_with_uuid(uuid): - return self.get_dataset(dataset.name, dataset.project) + return self.get_dataset( + dataset.name, + namespace_name=dataset.project.namespace.name, + project_name=dataset.project.name, + ) raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.") def get_remote_dataset( @@ -1171,9 +1198,18 @@ def get_remote_dataset( return DatasetRecord.from_dict(dataset_info) def get_dataset_dependencies( - self, name: str, version: str, project: Optional[Project] = None, indirect=False + self, + name: str, + version: str, + namespace_name: Optional[str] = None, + project_name: Optional[str] = None, + indirect=False, ) -> list[Optional[DatasetDependency]]: - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=namespace_name, + project_name=project_name, + ) direct_dependencies = self.metastore.get_direct_dataset_dependencies( dataset, version @@ -1187,10 +1223,13 @@ def get_dataset_dependencies( # dependency has been removed continue if d.is_dataset: - project = self.metastore.get_project(d.project, d.namespace) # only datasets can have dependencies d.dependencies = self.get_dataset_dependencies( - d.name, d.version, project, indirect=indirect + d.name, + d.version, + namespace_name=d.namespace, + project_name=d.project, + indirect=indirect, ) return direct_dependencies @@ -1340,7 +1379,11 @@ def export_dataset_table( project: Optional[Project] = None, client_config=None, ) -> list[str]: - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=project.namespace.name if project else None, + project_name=project.name if project else None, + ) return self.warehouse.export_dataset_table( bucket_uri, dataset, version, client_config @@ -1349,7 +1392,11 @@ def export_dataset_table( def dataset_table_export_file_names( self, name: str, version: str, project: Optional[Project] = None ) -> list[str]: - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=project.namespace.name if project else None, + project_name=project.name if project else None, + ) return self.warehouse.dataset_table_export_file_names(dataset, version) def remove_dataset( @@ -1359,7 +1406,11 @@ def remove_dataset( version: Optional[str] = None, force: Optional[bool] = False, ): - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=project.namespace.name if project else None, + project_name=project.name if project else None, + ) if not version and not force: raise ValueError(f"Missing dataset version from input for dataset {name}") if version and not dataset.has_version(version): @@ -1395,7 +1446,11 @@ def edit_dataset( if attrs is not None: update_data["attrs"] = attrs # type: ignore[assignment] - dataset = self.get_dataset(name, project) + dataset = self.get_dataset( + name, + namespace_name=project.namespace.name if project else None, + project_name=project.name if project else None, + ) return self.update_dataset(dataset, **update_data) def ls( @@ -1549,7 +1604,9 @@ def _instantiate(ds_uri: str) -> None: ) try: - local_dataset = self.get_dataset(local_ds_name, project=project) + local_dataset = self.get_dataset( + local_ds_name, namespace_name=namespace.name, project_name=project.name + ) if local_dataset and local_dataset.has_version(local_ds_version): raise DataChainError( f"Local dataset {local_ds_uri} already exists with different uuid," diff --git a/src/datachain/cli/commands/datasets.py b/src/datachain/cli/commands/datasets.py index ada09f199..19ea2dbe7 100644 --- a/src/datachain/cli/commands/datasets.py +++ b/src/datachain/cli/commands/datasets.py @@ -107,8 +107,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None): def list_datasets_local_versions(catalog: "Catalog", name: str): namespace_name, project_name, name = catalog.get_full_dataset_name(name) - project = catalog.metastore.get_project(project_name, namespace_name) - ds = catalog.get_dataset(name, project) + ds = catalog.get_dataset( + name, namespace_name=namespace_name, project_name=project_name + ) for v in ds.versions: yield (name, v.version) diff --git a/src/datachain/data_storage/metastore.py b/src/datachain/data_storage/metastore.py index 4310115dd..c388620bc 100644 --- a/src/datachain/data_storage/metastore.py +++ b/src/datachain/data_storage/metastore.py @@ -301,7 +301,13 @@ def list_datasets_by_prefix( """ @abstractmethod - def get_dataset(self, name: str, project_id: Optional[int] = None) -> DatasetRecord: + def get_dataset( + self, + name: str, # normal, not full dataset name + namespace_name: Optional[str] = None, + project_name: Optional[str] = None, + conn=None, + ) -> DatasetRecord: """Gets a single dataset by name.""" @abstractmethod @@ -912,11 +918,14 @@ def create_dataset( **kwargs, # TODO registered = True / False ) -> DatasetRecord: """Creates new dataset.""" - project_id = project_id or self.default_project.id + if not project_id: + project = self.default_project + else: + project = self.get_project_by_id(project_id) query = self._datasets_insert().values( name=name, - project_id=project_id, + project_id=project.id, status=status, feature_schema=json.dumps(feature_schema or {}), created_at=datetime.now(timezone.utc), @@ -935,7 +944,9 @@ def create_dataset( query = query.on_conflict_do_nothing(index_elements=["project_id", "name"]) self.db.execute(query) - return self.get_dataset(name, project_id) + return self.get_dataset( + name, namespace_name=project.namespace.name, project_name=project.name + ) def create_dataset_version( # noqa: PLR0913 self, @@ -992,7 +1003,12 @@ def create_dataset_version( # noqa: PLR0913 ) self.db.execute(query, conn=conn) - return self.get_dataset(dataset.name, dataset.project.id, conn=conn) + return self.get_dataset( + dataset.name, + namespace_name=dataset.project.namespace.name, + project_name=dataset.project.name, + conn=conn, + ) def remove_dataset(self, dataset: DatasetRecord) -> None: """Removes dataset.""" @@ -1216,21 +1232,30 @@ def list_datasets_by_prefix( def get_dataset( self, name: str, # normal, not full dataset name - project_id: Optional[int] = None, + namespace_name: Optional[str] = None, + project_name: Optional[str] = None, conn=None, ) -> DatasetRecord: """ Gets a single dataset in project by dataset name. """ - project_id = project_id or self.default_project.id + namespace_name = namespace_name or self.default_namespace_name + project_name = project_name or self.default_project_name d = self._datasets + n = self._namespaces + p = self._projects query = self._base_dataset_query() - query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined] + query = query.where( + d.c.name == name, + n.c.name == namespace_name, + p.c.name == project_name, + ) # type: ignore [attr-defined] ds = self._parse_dataset(self.db.execute(query, conn=conn)) if not ds: raise DatasetNotFoundError( - f"Dataset {name} not found in project with id {project_id}" + f"Dataset {name} not found in namespace {namespace_name}" + f" and project {project_name}" ) return ds diff --git a/src/datachain/delta.py b/src/datachain/delta.py index 680ce2d7d..1c3792abe 100644 --- a/src/datachain/delta.py +++ b/src/datachain/delta.py @@ -77,7 +77,8 @@ def _get_delta_chain( def _get_retry_chain( name: str, - project: Project, + namespace_name: str, + project_name: str, latest_version: str, source_ds_name: str, source_ds_project: Project, @@ -96,8 +97,8 @@ def _get_retry_chain( # Read the latest version of the result dataset for retry logic result_dataset = datachain.read_dataset( name, - namespace=project.namespace.name, - project=project.name, + namespace=namespace_name, + project=project_name, version=latest_version, ) source_dc = datachain.read_dataset( @@ -128,7 +129,8 @@ def _get_retry_chain( def _get_source_info( name: str, - project: Project, + namespace_name: str, + project_name: str, latest_version: str, catalog, ) -> tuple[ @@ -145,7 +147,11 @@ def _get_source_info( Returns (None, None, None, None) if source dataset was removed. """ dependencies = catalog.get_dataset_dependencies( - name, latest_version, project=project, indirect=False + name, + latest_version, + namespace_name=namespace_name, + project_name=project_name, + indirect=False, ) dep = dependencies[0] @@ -157,7 +163,9 @@ def _get_source_info( source_ds_name = dep.name source_ds_version = dep.version source_ds_latest_version = catalog.get_dataset( - source_ds_name, project=source_ds_project + source_ds_name, + namespace_name=source_ds_project.namespace.name, + project_name=source_ds_project.name, ).latest_version return ( @@ -211,12 +219,14 @@ def delta_retry_update( """ catalog = dc.session.catalog - project = catalog.metastore.get_project(project_name, namespace_name) + # project = catalog.metastore.get_project(project_name, namespace_name) dc._query.apply_listing_pre_step() # Check if dataset exists try: - dataset = catalog.get_dataset(name, project=project) + dataset = catalog.get_dataset( + name, namespace_name=namespace_name, project_name=project_name + ) latest_version = dataset.latest_version except DatasetNotFoundError: # First creation of result dataset @@ -234,7 +244,7 @@ def delta_retry_update( source_ds_version, source_ds_latest_version, dependencies, - ) = _get_source_info(name, project, latest_version, catalog) + ) = _get_source_info(name, namespace_name, project_name, latest_version, catalog) # If source_ds_name is None, starting dataset was removed if source_ds_name is None: @@ -264,7 +274,8 @@ def delta_retry_update( if delta_retry: retry_chain = _get_retry_chain( name, - project, + namespace_name, + project_name, latest_version, source_ds_name, source_ds_project, @@ -290,8 +301,8 @@ def delta_retry_update( latest_dataset = datachain.read_dataset( name, - namespace=project.namespace.name, - project=project.name, + namespace=namespace_name, + project=project_name, version=latest_version, ) compared_chain = latest_dataset.diff( diff --git a/src/datachain/lib/dc/datachain.py b/src/datachain/lib/dc/datachain.py index d92f4340d..75d2af185 100644 --- a/src/datachain/lib/dc/datachain.py +++ b/src/datachain/lib/dc/datachain.py @@ -284,7 +284,11 @@ def dataset(self) -> Optional[DatasetRecord]: """Underlying dataset, if there is one.""" if not self.name: return None - return self.session.catalog.get_dataset(self.name, self._query.project) + return self.session.catalog.get_dataset( + self.name, + namespace_name=self._query.project.namespace.name, + project_name=self._query.project.name, + ) def __or__(self, other: "Self") -> "Self": """Return `self.union(other)`.""" diff --git a/src/datachain/lib/dc/datasets.py b/src/datachain/lib/dc/datasets.py index d879f2314..aa04f5e25 100644 --- a/src/datachain/lib/dc/datasets.py +++ b/src/datachain/lib/dc/datasets.py @@ -357,7 +357,14 @@ def delete_dataset( ) from None if not force: - version = version or catalog.get_dataset(name, ds_project).latest_version + version = ( + version + or catalog.get_dataset( + name, + namespace_name=ds_project.namespace.name, + project_name=ds_project.name, + ).latest_version + ) else: version = None catalog.remove_dataset(name, ds_project, version=version, force=force) @@ -403,9 +410,7 @@ def move_dataset( namespace, project, name = catalog.get_full_dataset_name(src) dest_namespace, dest_project, dest_name = catalog.get_full_dataset_name(dest) - dataset = catalog.get_dataset( - name, catalog.metastore.get_project(project, namespace) - ) + dataset = catalog.get_dataset(name, namespace_name=namespace, project_name=project) catalog.update_dataset( dataset, diff --git a/src/datachain/listing.py b/src/datachain/listing.py index 20a966ab2..30d8a8479 100644 --- a/src/datachain/listing.py +++ b/src/datachain/listing.py @@ -65,17 +65,13 @@ def uri(self): @cached_property def dataset(self) -> "DatasetRecord": - from datachain.error import DatasetNotFoundError - assert self.dataset_name project = self.metastore.listing_project - try: - return self.metastore.get_dataset(self.dataset_name, project.id) - except DatasetNotFoundError: - raise DatasetNotFoundError( - f"Dataset {self.dataset_name} not found in namespace" - f" {project.namespace.name} and project {project.name}" - ) from None + return self.metastore.get_dataset( + self.dataset_name, + namespace_name=project.namespace.name, + project_name=project.name, + ) @cached_property def dataset_rows(self): diff --git a/src/datachain/query/dataset.py b/src/datachain/query/dataset.py index 8f183e1af..e75288eb0 100644 --- a/src/datachain/query/dataset.py +++ b/src/datachain/query/dataset.py @@ -1703,16 +1703,18 @@ def _add_dependencies(self, dataset: "DatasetRecord", version: str): for dep in self.catalog.get_dataset_dependencies( dep_dataset.name, dep_dataset_version, - dep_dataset.project, + namespace_name=dep_dataset.project.namespace.name, + project_name=dep_dataset.project.name, indirect=False, ): if dep: - dep_project = self.catalog.metastore.get_project( - dep.project, dep.namespace - ) dependencies.add( ( - self.catalog.get_dataset(dep.name, dep_project), + self.catalog.get_dataset( + dep.name, + namespace_name=dep.namespace, + project_name=dep.project, + ), dep.version, ) ) @@ -1754,7 +1756,11 @@ def save( if ( name and version - and self.catalog.get_dataset(name, project).has_version(version) + and self.catalog.get_dataset( + name, + namespace_name=project.namespace.name, + project_name=project.name, + ).has_version(version) ): raise RuntimeError(f"Dataset {name} already has version {version}") except DatasetNotFoundError: @@ -1808,11 +1814,15 @@ def save( # overriding dependencies self.dependencies = set() for dep in dependencies: - dep_project = self.catalog.metastore.get_project( - dep.project, dep.namespace - ) self.dependencies.add( - (self.catalog.get_dataset(dep.name, dep_project), dep.version) + ( + self.catalog.get_dataset( + dep.name, + namespace_name=dep.namespace, + project_name=dep.project, + ), + dep.version, + ) ) self._add_dependencies(dataset, version) # type: ignore [arg-type] diff --git a/tests/func/test_datasets.py b/tests/func/test_datasets.py index 211f66c17..7d117ea5a 100644 --- a/tests/func/test_datasets.py +++ b/tests/func/test_datasets.py @@ -11,7 +11,6 @@ from datachain.error import ( DatasetInvalidVersionError, DatasetNotFoundError, - ProjectNotFoundError, ) from datachain.lib.file import File from datachain.lib.listing import parse_listing_uri @@ -163,11 +162,11 @@ def test_create_dataset_already_exist_wrong_version( def test_get_dataset(cloud_test_catalog, dogs_dataset): catalog = cloud_test_catalog.catalog - dataset = catalog.get_dataset(dogs_dataset.name, dogs_dataset.project) + dataset = catalog.get_dataset(dogs_dataset.name) assert dataset.name == dogs_dataset.name with pytest.raises(DatasetNotFoundError): - catalog.get_dataset("wrong name", dogs_dataset.project) + catalog.get_dataset("wrong name") def test_create_dataset_from_sources(listed_bucket, cloud_test_catalog, project): @@ -317,7 +316,7 @@ def test_remove_dataset(cloud_test_catalog, dogs_dataset): catalog.remove_dataset(dogs_dataset.name, dogs_dataset.project, force=True) with pytest.raises(DatasetNotFoundError): - catalog.get_dataset(dogs_dataset.name, dogs_dataset.project) + catalog.get_dataset(dogs_dataset.name) dataset_table_name = catalog.warehouse.dataset_table_name(dogs_dataset, "1.0.0") assert table_row_count(catalog.warehouse.db, dataset_table_name) is None @@ -339,7 +338,7 @@ def test_remove_dataset_with_multiple_versions(cloud_test_catalog, dogs_dataset) catalog.remove_dataset(updated_dogs_dataset.name, dogs_dataset.project, force=True) with pytest.raises(DatasetNotFoundError): - catalog.get_dataset(updated_dogs_dataset.name, dogs_dataset.project) + catalog.get_dataset(updated_dogs_dataset.name) assert ( catalog.metastore.get_direct_dataset_dependencies(updated_dogs_dataset, "1.0.0") @@ -375,7 +374,7 @@ def test_edit_dataset(cloud_test_catalog, dogs_dataset): attrs=["cats", "birds"], ) - dataset = catalog.get_dataset(dataset_new_name, dogs_dataset.project) + dataset = catalog.get_dataset(dataset_new_name) assert dataset.name == dataset_new_name assert dataset.description == "new description" assert dataset.attrs == ["cats", "birds"] @@ -462,7 +461,7 @@ def test_move_dataset_then_save_into(test_session): def test_move_dataset_wrong_old_project(test_session, project): dc.read_values(num=[1, 2, 3], session=test_session).save("old.old.numbers") - with pytest.raises(ProjectNotFoundError): + with pytest.raises(DatasetNotFoundError): dc.move_dataset("wrong.wrong.numbers", "new.new.numbers", session=test_session) @@ -494,7 +493,7 @@ def test_edit_dataset_same_name(cloud_test_catalog, dogs_dataset): dogs_dataset.name, dogs_dataset.project, new_name=dataset_new_name ) - dataset = catalog.get_dataset(dataset_new_name, dogs_dataset.project) + dataset = catalog.get_dataset(dataset_new_name) assert dataset.name == dataset_new_name # check if dataset tables are renamed correctly @@ -523,7 +522,7 @@ def test_edit_dataset_remove_attrs_and_description(cloud_test_catalog, dogs_data attrs=[], ) - dataset = catalog.get_dataset(dataset_new_name, dogs_dataset.project) + dataset = catalog.get_dataset(dataset_new_name) assert [v.version for v in dataset.versions] == ["1.0.0"] assert dataset.name == dataset_new_name assert dataset.description == "" @@ -730,9 +729,7 @@ def test_dataset_preview_last_modified(cloud_test_catalog, dogs_dataset): catalog=catalog, ).save("dogs_custom_columns", project=project) - for r in ( - catalog.get_dataset("dogs_custom_columns", project).get_version("1.0.0").preview - ): + for r in catalog.get_dataset("dogs_custom_columns").get_version("1.0.0").preview: assert isinstance(r.get("file__last_modified"), str) @@ -766,9 +763,7 @@ def test_row_random(cloud_test_catalog): def test_dataset_stats_registered_ds(cloud_test_catalog, dogs_dataset): catalog = cloud_test_catalog.catalog - dataset = catalog.get_dataset(dogs_dataset.name, dogs_dataset.project).get_version( - "1.0.0" - ) + dataset = catalog.get_dataset(dogs_dataset.name).get_version("1.0.0") assert dataset.num_objects == 4 assert dataset.size == 15 rows_count = catalog.warehouse.dataset_rows_count(dogs_dataset, "1.0.0") @@ -787,9 +782,7 @@ def test_dataset_storage_dependencies(cloud_test_catalog, cloud_type, indirect): dc.read_storage(uri, session=ctc.session).save(ds_name) lst_ds_name, _, _ = parse_listing_uri(uri) - lst_dataset = catalog.metastore.get_dataset( - lst_ds_name, catalog.metastore.listing_project.id - ) + lst_dataset = catalog.get_dataset(lst_ds_name) assert [ dataset_dependency_asdict(d) diff --git a/tests/func/test_pull.py b/tests/func/test_pull.py index d0b0e9720..24ef86fc3 100644 --- a/tests/func/test_pull.py +++ b/tests/func/test_pull.py @@ -168,7 +168,11 @@ def test_pull_dataset_success( ) project = catalog.metastore.get_project(REMOTE_PROJECT_NAME, REMOTE_NAMESPACE_NAME) - dataset = catalog.get_dataset(local_ds_name or "dogs", project=project) + dataset = catalog.get_dataset( + local_ds_name or "dogs", + namespace_name=project.namespace.name, + project_name=project.name, + ) assert dataset.project.namespace.uuid == REMOTE_NAMESPACE_UUID assert dataset.project.uuid == REMOTE_PROJECT_UUID @@ -248,7 +252,9 @@ def test_datachain_read_dataset_pull( # Check that dataset is available locally after pulling project = catalog.metastore.get_project(REMOTE_PROJECT_NAME, REMOTE_NAMESPACE_NAME) - dataset = catalog.get_dataset("dogs", project) + dataset = catalog.get_dataset( + "dogs", namespace_name=project.namespace.name, project_name=project.name + ) assert dataset.name == "dogs" @@ -373,7 +379,9 @@ def test_pull_dataset_already_exists_locally( ) project = catalog.metastore.get_project(REMOTE_PROJECT_NAME, REMOTE_NAMESPACE_NAME) - other = catalog.get_dataset("other", project) + other = catalog.get_dataset( + "other", namespace_name=project.namespace.name, project_name=project.name + ) other_version = other.get_version("1.0.0") assert other_version.uuid == REMOTE_DATASET_UUID assert other_version.num_objects == 4 @@ -381,7 +389,9 @@ def test_pull_dataset_already_exists_locally( # dataset with same uuid created only once, on first pull with local name "other" with pytest.raises(DatasetNotFoundError): - catalog.get_dataset("dogs", project) + catalog.get_dataset( + "dogs", namespace_name=project.namespace.name, project_name=project.name + ) @pytest.mark.parametrize("cloud_type, version_aware", [("s3", False)], indirect=True)