From d271df45654a239bacf482338ea8febe5c029886 Mon Sep 17 00:00:00 2001 From: Nicholas Junge Date: Wed, 17 Jul 2024 15:37:06 +0200 Subject: [PATCH] Add rm test on more than 1000 files (#286) * Update pre-commit and requirements files * Add rm test on more than 1k files This is to assert that the file system does not trigger the lakeFS server's 1k file per deletion request limit, and instead batches the files. --- .pre-commit-config.yaml | 11 ++++++----- requirements-dev.txt | 16 ++++++++-------- requirements-docs.txt | 26 +++++++++++++------------- tests/test_rm.py | 25 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88b46d9d..aa98b1da 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,30 +12,31 @@ repos: - id: end-of-file-fixer - id: mixed-line-ending - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.10.1 hooks: # See https://github.com/pre-commit/mirrors-mypy/blob/main/.pre-commit-hooks.yaml - id: mypy types_or: [python, pyi] args: [--ignore-missing-imports, --scripts-are-modules] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.5.2 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - id: ruff-format - repo: https://github.com/PyCQA/bandit - rev: 1.7.8 + rev: 1.7.9 hooks: - id: bandit args: [-c, pyproject.toml] additional_dependencies: ["bandit[toml]"] - repo: https://github.com/jsh9/pydoclint - rev: 0.4.1 + rev: 0.5.6 hooks: - id: pydoclint + args: [--check-class-attributes=False] - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.1.39 + rev: 0.2.25 hooks: - id: pip-compile name: pip-compile requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt index b799e7fa..de7c9ae5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,28 +5,28 @@ annotated-types==0.7.0 build==1.2.1 cfgv==3.4.0 click==8.1.7 -coverage[toml]==7.5.4 +coverage[toml]==7.6.0 distlib==0.3.8 docstring-parser-fork==0.0.9 duckdb==0.10.0 filelock==3.15.4 fsspec==2024.6.1 -identify==2.5.36 +identify==2.6.0 iniconfig==2.0.0 lakefs==0.7.0 -lakefs-sdk==1.28.0 +lakefs-sdk==1.29.0 nodeenv==1.9.1 numpy==2.0.0 packaging==24.1 pandas[parquet]==2.2.2 platformdirs==4.2.2 pluggy==1.5.0 -polars==0.20.31 +polars==1.2.0 pre-commit==3.7.1 -pyarrow==16.1.0 -pydantic==2.7.4 -pydantic-core==2.18.4 -pydoclint==0.5.3 +pyarrow==17.0.0 +pydantic==2.8.2 +pydantic-core==2.20.1 +pydoclint==0.5.5 pyproject-hooks==1.1.0 pytest==8.2.2 pytest-cov==5.0.0 diff --git a/requirements-docs.txt b/requirements-docs.txt index eaaaff9c..0318824b 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -14,7 +14,7 @@ babel==2.15.0 beautifulsoup4==4.12.3 black==24.4.2 bleach==6.1.0 -certifi==2024.6.2 +certifi==2024.7.4 cffi==1.16.0 charset-normalizer==3.3.2 click==8.1.7 @@ -33,14 +33,14 @@ fsspec==2024.6.1 ghp-import==2.1.0 gitdb==4.0.11 gitpython==3.1.43 -griffe==0.47.0 +griffe==0.48.0 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 idna==3.7 importlib-metadata==8.0.0 importlib-resources==6.4.0 -ipykernel==6.29.4 +ipykernel==6.29.5 ipython==8.26.0 ipywidgets==8.1.3 isoduration==20.11.0 @@ -48,7 +48,7 @@ jedi==0.19.1 jinja2==3.1.4 json5==0.9.25 jsonpointer==3.0.0 -jsonschema[format-nongpl]==4.22.0 +jsonschema[format-nongpl]==4.23.0 jsonschema-specifications==2023.12.1 jupyter==1.0.0 jupyter-client==8.6.2 @@ -56,15 +56,15 @@ jupyter-console==6.6.3 jupyter-core==5.7.2 jupyter-events==0.10.0 jupyter-lsp==2.2.5 -jupyter-server==2.14.1 +jupyter-server==2.14.2 jupyter-server-terminals==0.5.3 jupyterlab==4.2.3 jupyterlab-pygments==0.3.0 -jupyterlab-server==2.27.2 +jupyterlab-server==2.27.3 jupyterlab-widgets==3.0.11 -jupytext==1.16.2 +jupytext==1.16.3 lakefs==0.7.0 -lakefs-sdk==1.28.0 +lakefs-sdk==1.29.0 markdown==3.6 markdown-it-py==3.0.0 markupsafe==2.1.5 @@ -82,7 +82,7 @@ mkdocs-get-deps==0.2.0 mkdocs-git-revision-date-localized-plugin==1.2.6 mkdocs-include-dir-to-nav==1.2.0 mkdocs-literate-nav==0.6.1 -mkdocs-material==9.5.27 +mkdocs-material==9.5.29 mkdocs-material-extensions==1.3.1 mkdocs-section-index==0.3.9 mkdocstrings[python]==0.25.1 @@ -110,8 +110,8 @@ psutil==6.0.0 ptyprocess==0.7.0 pure-eval==0.2.2 pycparser==2.22 -pydantic==2.7.4 -pydantic-core==2.18.4 +pydantic==2.8.2 +pydantic-core==2.20.1 pygments==2.18.0 pymdown-extensions==10.8.1 pyparsing==3.1.2 @@ -129,9 +129,9 @@ requests==2.32.3 rfc3339-validator==0.1.4 rfc3986-validator==0.1.1 rich==13.7.1 -rpds-py==0.18.1 +rpds-py==0.19.0 send2trash==1.8.3 -setuptools==70.1.1 +setuptools==70.3.0 six==1.16.0 smmap==5.0.1 sniffio==1.3.1 diff --git a/tests/test_rm.py b/tests/test_rm.py index 84755425..7b5bf8ad 100644 --- a/tests/test_rm.py +++ b/tests/test_rm.py @@ -2,6 +2,7 @@ from lakefs.repository import Repository from lakefs_spec import LakeFSFileSystem +from tests.util import RandomFileFactory def test_rm( @@ -66,3 +67,27 @@ def test_rm_recursive_with_maxdepth( fs.rm(f"{prefix}/dir1", recursive=True, maxdepth=1) # maxdepth is 1-indexed, level 1 being the directory to be removed. assert fs.exists(f"{prefix}/dir1/dir2/c.txt") + + +def test_rm_with_1k_objects_or_more( + fs: LakeFSFileSystem, + repository: Repository, + temp_branch: Branch, + random_file_factory: RandomFileFactory, +) -> None: + """ + Confirm that lakeFS does not error when attempting to delete more than 1k objects. + """ + testdir = f"{repository.id}/{temp_branch.id}/subfolder" + + # create and put 1001 objects into the above lakeFS directory. + for i in range(1002): + f = random_file_factory.make() + lpath = str(f) + rpath = testdir + f"/test_{i}.txt" + fs.put_file(lpath, rpath) + + assert len(fs.ls(testdir, detail=False)) > 1000 + + # should not error, because we chunk the file deletion requests to size 1000. + fs.rm(testdir, recursive=True, maxdepth=1)