From b545ce2e3635bdfb4e36920c3d835f1ed229872f Mon Sep 17 00:00:00 2001
From: yanghao14 <yanghao14@kuaishou.com>
Date: Sun, 28 Jul 2024 16:47:37 +0800
Subject: [PATCH 1/4] style: enforce python code style (#5)

- Add Ruff and MyPy configurations to pyproject.toml
- Integrate Python linting in GitHub Actions
- Format code with Ruff and Ruff Format
---
 .github/workflows/compliance.yml |  19 ++++-
 python/Makefile                  |   8 ++
 python/hudi/_internal.pyi        |  23 ++----
 python/pyproject.toml            |  24 ++++++
 python/tests/conftest.py         |  10 +--
 python/tests/test_table_read.py  | 134 +++++++++++++++++++++++--------
 6 files changed, 161 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml
index 6f0e4dbe..dcc49737 100644
--- a/.github/workflows/compliance.yml
+++ b/.github/workflows/compliance.yml
@@ -47,5 +47,22 @@ jobs:
       - uses: actions/checkout@v4
       - name: Check license header
         uses: apache/skywalking-eyes/header@v0.6.0
-      - name: Check code style
+      - name: Check rust code style
         run: cd python && make check-rust
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+          cache-dependency-path: pyproject.toml
+      - name: Install python linter dependencies
+        working-directory: ./python
+        run: |
+          make setup-env
+          source venv/bin/activate
+          pip install ruff mypy
+      - name: Run python linter
+        working-directory: ./python
+        run: |
+          source venv/bin/activate
+          make check-python
diff --git a/python/Makefile b/python/Makefile
index 4badaca8..be0131a7 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -48,6 +48,14 @@ check-rust: ## Run check on Rust
 	$(info --- Check Rust format ---)
 	cargo fmt --all -- --check
 
+.PHONY: check-python
+check-python: ## Run check on Python
+	$(info --- Check Python code quality ---)
+	pip install ruff mypy
+	ruff check .
+	ruff format .
+	mypy .
+
 .PHONY: test-rust
 test-rust: ## Run tests on Rust
 	$(info --- Run Rust tests ---)
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index fd97cc31..8f542b34 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -21,7 +21,6 @@ import pyarrow
 
 __version__: str
 
-
 @dataclass(init=False)
 class HudiFileSlice:
     file_group_id: str
@@ -33,24 +32,16 @@ class HudiFileSlice:
 
     def base_file_relative_path(self) -> str: ...
 
-
 @dataclass(init=False)
 class HudiTable:
-
     def __init__(
-            self,
-            table_uri: str,
-            options: Optional[Dict[str, str]] = None,
+        self,
+        table_uri: str,
+        options: Optional[Dict[str, str]] = None,
     ): ...
-
-    def get_schema(self) -> "pyarrow.Schema": ...
-
+    def get_schema(self) -> 'pyarrow.Schema': ...
     def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ...
-
     def get_file_slices(self) -> List[HudiFileSlice]: ...
-
-    def read_file_slice(self, base_file_relative_path) -> pyarrow.RecordBatch: ...
-
-    def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ...
-
-    def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ...
+    def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ...
+    def read_snapshot(self) -> List['pyarrow.RecordBatch']: ...
+    def read_snapshot_as_of(self, timestamp: str) -> List['pyarrow.RecordBatch']: ...
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 36f350eb..b5ee5636 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -49,9 +49,33 @@ dynamic = ["version"]
 [tool.maturin]
 module-name = "hudi._internal"
 
+[tool.ruff]
+target-version = 'py38'
+lint.mccabe = { max-complexity = 14 }
+lint.flake8-quotes = {inline-quotes = 'single', multiline-quotes = 'double'}
+lint.pydocstyle = { convention = 'google' }
+format.quote-style = 'single'
+lint.ignore = [
+    "Q000",
+    "Q001",
+    "Q002",
+    "Q003",
+    "COM812",
+    "COM819",
+    "D104",
+    "I001",
+    "UP006",
+    "UP007",
+    "UP037",
+    "E501", # Formatted code may exceed the line length, leading to line-too-long (E501) errors.
+]
+
 [tool.mypy]
 files = "hudi/*.py"
 exclude = "^tests"
+warn_unused_configs = true
+ignore_missing_imports = true
+strict = true
 
 [tool.pytest.ini_options]
 testpaths = [
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index b1fd5668..416bd90d 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -23,18 +23,18 @@
 
 
 def _extract_testing_table(zip_file_path, target_path) -> str:
-    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
         zip_ref.extractall(target_path)
-    return os.path.join(target_path, "trips_table")
+    return os.path.join(target_path, 'trips_table')
 
 
 @pytest.fixture(
     params=[
-        "0.x_cow_partitioned",
+        '0.x_cow_partitioned',
     ]
 )
 def get_sample_table(request, tmp_path) -> str:
-    fixture_path = "tests/table"
+    fixture_path = 'tests/table'
     table_name = request.param
-    zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip")
+    zip_file_path = Path(fixture_path).joinpath(f'{table_name}.zip')
     return _extract_testing_table(zip_file_path, tmp_path)
diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py
index e56463c5..2c100d1f 100644
--- a/python/tests/test_table_read.py
+++ b/python/tests/test_table_read.py
@@ -20,28 +20,49 @@
 
 from hudi import HudiTable
 
-PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0)
-pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0")
+PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split('.') if s.isnumeric()) < (
+    8,
+    0,
+    0,
+)
+pytestmark = pytest.mark.skipif(
+    PYARROW_LE_8_0_0, reason='hudi only supported if pyarrow >= 8.0.0'
+)
 
 
 def test_sample_table(get_sample_table):
     table_path = get_sample_table
     table = HudiTable(table_path)
 
-    assert table.get_schema().names == ['_hoodie_commit_time', '_hoodie_commit_seqno', '_hoodie_record_key',
-                                        '_hoodie_partition_path', '_hoodie_file_name', 'ts', 'uuid', 'rider', 'driver',
-                                        'fare', 'city']
+    assert table.get_schema().names == [
+        '_hoodie_commit_time',
+        '_hoodie_commit_seqno',
+        '_hoodie_record_key',
+        '_hoodie_partition_path',
+        '_hoodie_file_name',
+        'ts',
+        'uuid',
+        'rider',
+        'driver',
+        'fare',
+        'city',
+    ]
 
     file_slices = table.get_file_slices()
     assert len(file_slices) == 5
-    assert set(f.commit_time for f in file_slices) == {'20240402123035233', '20240402144910683'}
+    assert set(f.commit_time for f in file_slices) == {
+        '20240402123035233',
+        '20240402144910683',
+    }
     assert all(f.num_records == 1 for f in file_slices)
     file_slice_paths = [f.base_file_relative_path() for f in file_slices]
-    assert set(file_slice_paths) == {'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
-                                     'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
-                                     'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
-                                     'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
-                                     'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet'}
+    assert set(file_slice_paths) == {
+        'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
+        'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
+        'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
+        'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
+        'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet',
+    }
 
     batch = table.read_file_slice(file_slice_paths[0])
     t = pa.Table.from_batches([batch])
@@ -53,29 +74,72 @@ def test_sample_table(get_sample_table):
     assert len(next(file_slices_gen)) == 2
 
     batches = table.read_snapshot()
-    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
-    assert t.to_pylist() == [{'_hoodie_commit_time': '20240402144910683', 'ts': 1695046462179,
-                              'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 339.0},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788,
-                              'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911,
-                              'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087,
-                              'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016,
-                              'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}]
+    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
+    assert t.to_pylist() == [
+        {
+            '_hoodie_commit_time': '20240402144910683',
+            'ts': 1695046462179,
+            'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
+            'fare': 339.0,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695091554788,
+            'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
+            'fare': 27.7,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695115999911,
+            'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
+            'fare': 17.85,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695159649087,
+            'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
+            'fare': 19.1,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695516137016,
+            'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
+            'fare': 34.15,
+        },
+    ]
 
-    table = HudiTable(table_path, {
-        "hoodie.read.as.of.timestamp": "20240402123035233"})
+    table = HudiTable(table_path, {'hoodie.read.as.of.timestamp': '20240402123035233'})
     batches = table.read_snapshot()
-    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
-    assert t.to_pylist() == [{'_hoodie_commit_time': '20240402123035233', 'ts': 1695046462179,
-                              'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 33.9},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788,
-                              'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911,
-                              'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087,
-                              'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1},
-                             {'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016,
-                              'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}]
+    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
+    assert t.to_pylist() == [
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695046462179,
+            'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
+            'fare': 33.9,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695091554788,
+            'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
+            'fare': 27.7,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695115999911,
+            'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
+            'fare': 17.85,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695159649087,
+            'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
+            'fare': 19.1,
+        },
+        {
+            '_hoodie_commit_time': '20240402123035233',
+            'ts': 1695516137016,
+            'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
+            'fare': 34.15,
+        },
+    ]

From d0e8a096d1948fadb91e2eef8f924464d70c72f9 Mon Sep 17 00:00:00 2001
From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com>
Date: Tue, 6 Aug 2024 19:58:11 -0500
Subject: [PATCH 2/4] update pyproject.toml for python style

---
 python/pyproject.toml | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index b5ee5636..874a2e06 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -51,30 +51,22 @@ module-name = "hudi._internal"
 
 [tool.ruff]
 target-version = 'py38'
-lint.mccabe = { max-complexity = 14 }
-lint.flake8-quotes = {inline-quotes = 'single', multiline-quotes = 'double'}
-lint.pydocstyle = { convention = 'google' }
-format.quote-style = 'single'
-lint.ignore = [
-    "Q000",
-    "Q001",
-    "Q002",
-    "Q003",
-    "COM812",
-    "COM819",
-    "D104",
-    "I001",
-    "UP006",
-    "UP007",
-    "UP037",
-    "E501", # Formatted code may exceed the line length, leading to line-too-long (E501) errors.
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
+lint.select = [
+    "E4",
+    "E7",
+    "E9",
+    "F",
+    # isort
+    "I",
 ]
+# don't ignore any rule unless it becomes imperative
+lint.ignore = []
+lint.isort.known-first-party = ["hudi"]
 
 [tool.mypy]
 files = "hudi/*.py"
 exclude = "^tests"
-warn_unused_configs = true
-ignore_missing_imports = true
 strict = true
 
 [tool.pytest.ini_options]

From 57c70ad6ff576fe9e34fd7ed3d1b5718f4080ef3 Mon Sep 17 00:00:00 2001
From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com>
Date: Tue, 6 Aug 2024 20:39:10 -0500
Subject: [PATCH 3/4] fix configs

---
 .github/workflows/compliance.yml |  14 ++--
 python/Makefile                  |   7 +-
 python/hudi/__init__.py          |   2 +-
 python/hudi/_internal.pyi        |  10 +--
 python/pyproject.toml            |   2 +
 python/tests/conftest.py         |  10 +--
 python/tests/test_table_read.py  | 126 +++++++++++++++----------------
 7 files changed, 85 insertions(+), 86 deletions(-)

diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml
index dcc49737..9ca8b11c 100644
--- a/.github/workflows/compliance.yml
+++ b/.github/workflows/compliance.yml
@@ -49,20 +49,16 @@ jobs:
         uses: apache/skywalking-eyes/header@v0.6.0
       - name: Check rust code style
         run: cd python && make check-rust
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: 3.8
           cache: pip
-          cache-dependency-path: pyproject.toml
-      - name: Install python linter dependencies
+          cache-dependency-path: ./python/pyproject.toml
+      - name: Check python
         working-directory: ./python
         run: |
           make setup-env
           source venv/bin/activate
-          pip install ruff mypy
-      - name: Run python linter
-        working-directory: ./python
-        run: |
-          source venv/bin/activate
+          make develop
           make check-python
diff --git a/python/Makefile b/python/Makefile
index be0131a7..a60205dd 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -50,10 +50,11 @@ check-rust: ## Run check on Rust
 
 .PHONY: check-python
 check-python: ## Run check on Python
-	$(info --- Check Python code quality ---)
-	pip install ruff mypy
+	$(info --- Check Python format ---)
+	ruff format --check --diff .
+	$(info --- Check Python linting ---)
 	ruff check .
-	ruff format .
+	$(info --- Check Python typing ---)
 	mypy .
 
 .PHONY: test-rust
diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py
index 09a93399..b0a792e5 100644
--- a/python/hudi/__init__.py
+++ b/python/hudi/__init__.py
@@ -15,6 +15,6 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-from ._internal import __version__ as __version__
 from ._internal import HudiFileSlice as HudiFileSlice
 from ._internal import HudiTable as HudiTable
+from ._internal import __version__ as __version__
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index 8f542b34..0f83aeec 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -15,9 +15,9 @@
 #  specific language governing permissions and limitations
 #  under the License.
 from dataclasses import dataclass
-from typing import Optional, Dict, List
+from typing import Dict, List, Optional
 
-import pyarrow
+import pyarrow  # type: ignore
 
 __version__: str
 
@@ -39,9 +39,9 @@ class HudiTable:
         table_uri: str,
         options: Optional[Dict[str, str]] = None,
     ): ...
-    def get_schema(self) -> 'pyarrow.Schema': ...
+    def get_schema(self) -> "pyarrow.Schema": ...
     def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ...
     def get_file_slices(self) -> List[HudiFileSlice]: ...
     def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ...
-    def read_snapshot(self) -> List['pyarrow.RecordBatch']: ...
-    def read_snapshot_as_of(self, timestamp: str) -> List['pyarrow.RecordBatch']: ...
+    def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ...
+    def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ...
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 874a2e06..367cf465 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -42,6 +42,8 @@ dependencies = [
 optional-dependencies = { devel = [
     "pytest",
     "coverage",
+    "ruff==0.5.2",
+    "mypy==1.10.1",
 ] }
 
 dynamic = ["version"]
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 416bd90d..b1fd5668 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -23,18 +23,18 @@
 
 
 def _extract_testing_table(zip_file_path, target_path) -> str:
-    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
         zip_ref.extractall(target_path)
-    return os.path.join(target_path, 'trips_table')
+    return os.path.join(target_path, "trips_table")
 
 
 @pytest.fixture(
     params=[
-        '0.x_cow_partitioned',
+        "0.x_cow_partitioned",
     ]
 )
 def get_sample_table(request, tmp_path) -> str:
-    fixture_path = 'tests/table'
+    fixture_path = "tests/table"
     table_name = request.param
-    zip_file_path = Path(fixture_path).joinpath(f'{table_name}.zip')
+    zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip")
     return _extract_testing_table(zip_file_path, tmp_path)
diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py
index 2c100d1f..c3c84c9e 100644
--- a/python/tests/test_table_read.py
+++ b/python/tests/test_table_read.py
@@ -20,13 +20,13 @@
 
 from hudi import HudiTable
 
-PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split('.') if s.isnumeric()) < (
+PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (
     8,
     0,
     0,
 )
 pytestmark = pytest.mark.skipif(
-    PYARROW_LE_8_0_0, reason='hudi only supported if pyarrow >= 8.0.0'
+    PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0"
 )
 
 
@@ -35,33 +35,33 @@ def test_sample_table(get_sample_table):
     table = HudiTable(table_path)
 
     assert table.get_schema().names == [
-        '_hoodie_commit_time',
-        '_hoodie_commit_seqno',
-        '_hoodie_record_key',
-        '_hoodie_partition_path',
-        '_hoodie_file_name',
-        'ts',
-        'uuid',
-        'rider',
-        'driver',
-        'fare',
-        'city',
+        "_hoodie_commit_time",
+        "_hoodie_commit_seqno",
+        "_hoodie_record_key",
+        "_hoodie_partition_path",
+        "_hoodie_file_name",
+        "ts",
+        "uuid",
+        "rider",
+        "driver",
+        "fare",
+        "city",
     ]
 
     file_slices = table.get_file_slices()
     assert len(file_slices) == 5
     assert set(f.commit_time for f in file_slices) == {
-        '20240402123035233',
-        '20240402144910683',
+        "20240402123035233",
+        "20240402144910683",
     }
     assert all(f.num_records == 1 for f in file_slices)
     file_slice_paths = [f.base_file_relative_path() for f in file_slices]
     assert set(file_slice_paths) == {
-        'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet',
-        'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet',
-        'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet',
-        'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet',
-        'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet',
+        "chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet",
+        "san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet",
+        "san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet",
+        "san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet",
+        "sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet",
     }
 
     batch = table.read_file_slice(file_slice_paths[0])
@@ -74,72 +74,72 @@ def test_sample_table(get_sample_table):
     assert len(next(file_slices_gen)) == 2
 
     batches = table.read_snapshot()
-    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
+    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
     assert t.to_pylist() == [
         {
-            '_hoodie_commit_time': '20240402144910683',
-            'ts': 1695046462179,
-            'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
-            'fare': 339.0,
+            "_hoodie_commit_time": "20240402144910683",
+            "ts": 1695046462179,
+            "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00",
+            "fare": 339.0,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695091554788,
-            'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
-            'fare': 27.7,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695091554788,
+            "uuid": "e96c4396-3fad-413a-a942-4cb36106d721",
+            "fare": 27.7,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695115999911,
-            'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
-            'fare': 17.85,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695115999911,
+            "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa",
+            "fare": 17.85,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695159649087,
-            'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
-            'fare': 19.1,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695159649087,
+            "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330",
+            "fare": 19.1,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695516137016,
-            'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
-            'fare': 34.15,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695516137016,
+            "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c",
+            "fare": 34.15,
         },
     ]
 
-    table = HudiTable(table_path, {'hoodie.read.as.of.timestamp': '20240402123035233'})
+    table = HudiTable(table_path, {"hoodie.read.as.of.timestamp": "20240402123035233"})
     batches = table.read_snapshot()
-    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts')
+    t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
     assert t.to_pylist() == [
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695046462179,
-            'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00',
-            'fare': 33.9,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695046462179,
+            "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00",
+            "fare": 33.9,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695091554788,
-            'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721',
-            'fare': 27.7,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695091554788,
+            "uuid": "e96c4396-3fad-413a-a942-4cb36106d721",
+            "fare": 27.7,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695115999911,
-            'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa',
-            'fare': 17.85,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695115999911,
+            "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa",
+            "fare": 17.85,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695159649087,
-            'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330',
-            'fare': 19.1,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695159649087,
+            "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330",
+            "fare": 19.1,
         },
         {
-            '_hoodie_commit_time': '20240402123035233',
-            'ts': 1695516137016,
-            'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c',
-            'fare': 34.15,
+            "_hoodie_commit_time": "20240402123035233",
+            "ts": 1695516137016,
+            "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c",
+            "fare": 34.15,
         },
     ]

From 628baaee1ad8ad82318f694ba6ad54a0121da226 Mon Sep 17 00:00:00 2001
From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com>
Date: Tue, 6 Aug 2024 20:42:00 -0500
Subject: [PATCH 4/4] reverse workflow diff

---
 .github/workflows/compliance.yml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml
index 9ca8b11c..dcc49737 100644
--- a/.github/workflows/compliance.yml
+++ b/.github/workflows/compliance.yml
@@ -49,16 +49,20 @@ jobs:
         uses: apache/skywalking-eyes/header@v0.6.0
       - name: Check rust code style
         run: cd python && make check-rust
-      - name: Setup Python
+      - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
-          python-version: 3.8
+          python-version: ${{ matrix.python-version }}
           cache: pip
-          cache-dependency-path: ./python/pyproject.toml
-      - name: Check python
+          cache-dependency-path: pyproject.toml
+      - name: Install python linter dependencies
         working-directory: ./python
         run: |
           make setup-env
           source venv/bin/activate
-          make develop
+          pip install ruff mypy
+      - name: Run python linter
+        working-directory: ./python
+        run: |
+          source venv/bin/activate
           make check-python