From 1051c074b2c1fb8473e63b5d11608ab4e12d063a Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 8 Jul 2024 10:07:45 +0200 Subject: [PATCH] whitelist datasets (#2000) ## Changes whitelist datasets ### Linked issues ### Linked issues Progresses #1901 ### Functionality None ### Tests - [x] manually tested Co-authored-by: Eric Vergnaud --- .../labs/ucx/source_code/known.json | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/src/databricks/labs/ucx/source_code/known.json b/src/databricks/labs/ucx/source_code/known.json index c12ef8866d..acd9ee8810 100644 --- a/src/databricks/labs/ucx/source_code/known.json +++ b/src/databricks/labs/ucx/source_code/known.json @@ -1416,6 +1416,153 @@ "databricks-sdk": { "databricks.sdk": [] }, + "datasets": { + "datasets": [], + "datasets.arrow_dataset": [], + "datasets.arrow_reader": [], + "datasets.arrow_writer": [], + "datasets.builder.bak": [], + "datasets.builder": [], + "datasets.combine": [], + "datasets.commands": [], + "datasets.commands.convert": [], + "datasets.commands.convert_to_parquet": [], + "datasets.commands.datasets_cli": [], + "datasets.commands.delete_from_hub": [], + "datasets.commands.dummy_data": [], + "datasets.commands.env": [], + "datasets.commands.run_beam": [], + "datasets.commands.test": [], + "datasets.config": [], + "datasets.data_files": [], + "datasets.dataset_dict": [], + "datasets.distributed": [], + "datasets.download": [], + "datasets.download.download_config": [], + "datasets.download.download_manager": [], + "datasets.download.mock_download_manager": [], + "datasets.download.streaming_download_manager": [], + "datasets.exceptions": [], + "datasets.features": [], + "datasets.features.audio": [], + "datasets.features.features": [], + "datasets.features.image": [], + "datasets.features.translation": [], + "datasets.filesystems": [], + "datasets.filesystems.compression": [], + "datasets.filesystems.s3filesystem": [], + "datasets.fingerprint": [], + "datasets.formatting": [], + "datasets.formatting.formatting": [], + "datasets.formatting.jax_formatter": [], + "datasets.formatting.np_formatter": [], + "datasets.formatting.polars_formatter": [], + "datasets.formatting.tf_formatter": [], + "datasets.formatting.torch_formatter": [], + "datasets.hub": [], + "datasets.info": [], + "datasets.inspect": [], + "datasets.io": [], + "datasets.io.abc": [], + "datasets.io.csv": [], + "datasets.io.generator": [], + "datasets.io.json": [], + "datasets.io.parquet": [], + "datasets.io.spark": [], + "datasets.io.sql": [], + "datasets.io.text": [], + "datasets.iterable_dataset": [], + "datasets.keyhash": [], + "datasets.load": [], + "datasets.metric": [], + "datasets.naming": [], + "datasets.packaged_modules": [], + "datasets.packaged_modules.arrow": [], + "datasets.packaged_modules.arrow.arrow": [], + "datasets.packaged_modules.audiofolder": [], + "datasets.packaged_modules.audiofolder.audiofolder": [], + "datasets.packaged_modules.cache": [], + "datasets.packaged_modules.cache.cache": [], + "datasets.packaged_modules.csv": [], + "datasets.packaged_modules.csv.csv": [], + "datasets.packaged_modules.folder_based_builder": [], + "datasets.packaged_modules.folder_based_builder.folder_based_builder": [], + "datasets.packaged_modules.generator": [], + "datasets.packaged_modules.generator.generator": [], + "datasets.packaged_modules.imagefolder": [], + "datasets.packaged_modules.imagefolder.imagefolder": [], + "datasets.packaged_modules.json": [], + "datasets.packaged_modules.json.json": [], + "datasets.packaged_modules.pandas": [], + "datasets.packaged_modules.pandas.pandas": [], + "datasets.packaged_modules.parquet": [], + "datasets.packaged_modules.parquet.parquet": [], + "datasets.packaged_modules.spark": [], + "datasets.packaged_modules.spark.spark": [ + { + "code": "legacy-context-in-shared-clusters", + "message": "sparkContext is not supported on UC Shared Clusters. Rewrite it using spark" + }, + { + "code": "rdd-in-shared-clusters", + "message": "RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API" + }, + { + "code": "rdd-in-shared-clusters", + "message": "RDD APIs are not supported on UC Shared Clusters. Use mapInArrow() or Pandas UDFs instead" + } + ], + "datasets.packaged_modules.sql": [], + "datasets.packaged_modules.sql.sql": [], + "datasets.packaged_modules.text": [], + "datasets.packaged_modules.text.text": [], + "datasets.packaged_modules.webdataset": [], + "datasets.packaged_modules.webdataset._tenbin": [], + "datasets.packaged_modules.webdataset.webdataset": [], + "datasets.parallel": [], + "datasets.parallel.parallel": [], + "datasets.search": [], + "datasets.splits": [], + "datasets.streaming": [], + "datasets.table": [], + "datasets.tasks": [], + "datasets.tasks.audio_classification": [], + "datasets.tasks.automatic_speech_recognition": [], + "datasets.tasks.base": [], + "datasets.tasks.image_classification": [], + "datasets.tasks.language_modeling": [], + "datasets.tasks.question_answering": [], + "datasets.tasks.summarization": [], + "datasets.tasks.text_classification": [], + "datasets.utils": [], + "datasets.utils._dataset_viewer": [], + "datasets.utils._dill": [], + "datasets.utils._filelock": [], + "datasets.utils.beam_utils": [], + "datasets.utils.cache": [], + "datasets.utils.deprecation_utils": [], + "datasets.utils.doc_utils": [], + "datasets.utils.download_manager": [], + "datasets.utils.experimental": [], + "datasets.utils.extract": [], + "datasets.utils.file_utils": [], + "datasets.utils.filelock": [], + "datasets.utils.hub": [], + "datasets.utils.info_utils": [], + "datasets.utils.logging": [], + "datasets.utils.metadata": [], + "datasets.utils.patching": [], + "datasets.utils.py_utils": [], + "datasets.utils.readme": [], + "datasets.utils.resources": [], + "datasets.utils.sharding": [], + "datasets.utils.stratify": [], + "datasets.utils.tf_utils": [], + "datasets.utils.tqdm": [], + "datasets.utils.track": [], + "datasets.utils.typing": [], + "datasets.utils.version": [] + }, "datasetsforecast": { "action_files": [], "action_files.test_dask": [],