From b545ce2e3635bdfb4e36920c3d835f1ed229872f Mon Sep 17 00:00:00 2001 From: yanghao14 Date: Sun, 28 Jul 2024 16:47:37 +0800 Subject: [PATCH 1/4] style: enforce python code style (#5) - Add Ruff and MyPy configurations to pyproject.toml - Integrate Python linting in GitHub Actions - Format code with Ruff and Ruff Format --- .github/workflows/compliance.yml | 19 ++++- python/Makefile | 8 ++ python/hudi/_internal.pyi | 23 ++---- python/pyproject.toml | 24 ++++++ python/tests/conftest.py | 10 +-- python/tests/test_table_read.py | 134 +++++++++++++++++++++++-------- 6 files changed, 161 insertions(+), 57 deletions(-) diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index 6f0e4dbe..dcc49737 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -47,5 +47,22 @@ jobs: - uses: actions/checkout@v4 - name: Check license header uses: apache/skywalking-eyes/header@v0.6.0 - - name: Check code style + - name: Check rust code style run: cd python && make check-rust + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: pyproject.toml + - name: Install python linter dependencies + working-directory: ./python + run: | + make setup-env + source venv/bin/activate + pip install ruff mypy + - name: Run python linter + working-directory: ./python + run: | + source venv/bin/activate + make check-python diff --git a/python/Makefile b/python/Makefile index 4badaca8..be0131a7 100644 --- a/python/Makefile +++ b/python/Makefile @@ -48,6 +48,14 @@ check-rust: ## Run check on Rust $(info --- Check Rust format ---) cargo fmt --all -- --check +.PHONY: check-python +check-python: ## Run check on Python + $(info --- Check Python code quality ---) + pip install ruff mypy + ruff check . + ruff format . + mypy . + .PHONY: test-rust test-rust: ## Run tests on Rust $(info --- Run Rust tests ---) diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi index fd97cc31..8f542b34 100644 --- a/python/hudi/_internal.pyi +++ b/python/hudi/_internal.pyi @@ -21,7 +21,6 @@ import pyarrow __version__: str - @dataclass(init=False) class HudiFileSlice: file_group_id: str @@ -33,24 +32,16 @@ class HudiFileSlice: def base_file_relative_path(self) -> str: ... - @dataclass(init=False) class HudiTable: - def __init__( - self, - table_uri: str, - options: Optional[Dict[str, str]] = None, + self, + table_uri: str, + options: Optional[Dict[str, str]] = None, ): ... - - def get_schema(self) -> "pyarrow.Schema": ... - + def get_schema(self) -> 'pyarrow.Schema': ... def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ... - def get_file_slices(self) -> List[HudiFileSlice]: ... - - def read_file_slice(self, base_file_relative_path) -> pyarrow.RecordBatch: ... - - def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ... - - def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ... + def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ... + def read_snapshot(self) -> List['pyarrow.RecordBatch']: ... + def read_snapshot_as_of(self, timestamp: str) -> List['pyarrow.RecordBatch']: ... diff --git a/python/pyproject.toml b/python/pyproject.toml index 36f350eb..b5ee5636 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -49,9 +49,33 @@ dynamic = ["version"] [tool.maturin] module-name = "hudi._internal" +[tool.ruff] +target-version = 'py38' +lint.mccabe = { max-complexity = 14 } +lint.flake8-quotes = {inline-quotes = 'single', multiline-quotes = 'double'} +lint.pydocstyle = { convention = 'google' } +format.quote-style = 'single' +lint.ignore = [ + "Q000", + "Q001", + "Q002", + "Q003", + "COM812", + "COM819", + "D104", + "I001", + "UP006", + "UP007", + "UP037", + "E501", # Formatted code may exceed the line length, leading to line-too-long (E501) errors. +] + [tool.mypy] files = "hudi/*.py" exclude = "^tests" +warn_unused_configs = true +ignore_missing_imports = true +strict = true [tool.pytest.ini_options] testpaths = [ diff --git a/python/tests/conftest.py b/python/tests/conftest.py index b1fd5668..416bd90d 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -23,18 +23,18 @@ def _extract_testing_table(zip_file_path, target_path) -> str: - with zipfile.ZipFile(zip_file_path, "r") as zip_ref: + with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(target_path) - return os.path.join(target_path, "trips_table") + return os.path.join(target_path, 'trips_table') @pytest.fixture( params=[ - "0.x_cow_partitioned", + '0.x_cow_partitioned', ] ) def get_sample_table(request, tmp_path) -> str: - fixture_path = "tests/table" + fixture_path = 'tests/table' table_name = request.param - zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip") + zip_file_path = Path(fixture_path).joinpath(f'{table_name}.zip') return _extract_testing_table(zip_file_path, tmp_path) diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index e56463c5..2c100d1f 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -20,28 +20,49 @@ from hudi import HudiTable -PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0) -pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0") +PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split('.') if s.isnumeric()) < ( + 8, + 0, + 0, +) +pytestmark = pytest.mark.skipif( + PYARROW_LE_8_0_0, reason='hudi only supported if pyarrow >= 8.0.0' +) def test_sample_table(get_sample_table): table_path = get_sample_table table = HudiTable(table_path) - assert table.get_schema().names == ['_hoodie_commit_time', '_hoodie_commit_seqno', '_hoodie_record_key', - '_hoodie_partition_path', '_hoodie_file_name', 'ts', 'uuid', 'rider', 'driver', - 'fare', 'city'] + assert table.get_schema().names == [ + '_hoodie_commit_time', + '_hoodie_commit_seqno', + '_hoodie_record_key', + '_hoodie_partition_path', + '_hoodie_file_name', + 'ts', + 'uuid', + 'rider', + 'driver', + 'fare', + 'city', + ] file_slices = table.get_file_slices() assert len(file_slices) == 5 - assert set(f.commit_time for f in file_slices) == {'20240402123035233', '20240402144910683'} + assert set(f.commit_time for f in file_slices) == { + '20240402123035233', + '20240402144910683', + } assert all(f.num_records == 1 for f in file_slices) file_slice_paths = [f.base_file_relative_path() for f in file_slices] - assert set(file_slice_paths) == {'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet', - 'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet', - 'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet', - 'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet', - 'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet'} + assert set(file_slice_paths) == { + 'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet', + 'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet', + 'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet', + 'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet', + 'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet', + } batch = table.read_file_slice(file_slice_paths[0]) t = pa.Table.from_batches([batch]) @@ -53,29 +74,72 @@ def test_sample_table(get_sample_table): assert len(next(file_slices_gen)) == 2 batches = table.read_snapshot() - t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts") - assert t.to_pylist() == [{'_hoodie_commit_time': '20240402144910683', 'ts': 1695046462179, - 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 339.0}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788, - 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911, - 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087, - 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016, - 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}] + t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts') + assert t.to_pylist() == [ + { + '_hoodie_commit_time': '20240402144910683', + 'ts': 1695046462179, + 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', + 'fare': 339.0, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695091554788, + 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', + 'fare': 27.7, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695115999911, + 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', + 'fare': 17.85, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695159649087, + 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', + 'fare': 19.1, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695516137016, + 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', + 'fare': 34.15, + }, + ] - table = HudiTable(table_path, { - "hoodie.read.as.of.timestamp": "20240402123035233"}) + table = HudiTable(table_path, {'hoodie.read.as.of.timestamp': '20240402123035233'}) batches = table.read_snapshot() - t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts") - assert t.to_pylist() == [{'_hoodie_commit_time': '20240402123035233', 'ts': 1695046462179, - 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'fare': 33.9}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695091554788, - 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', 'fare': 27.7}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695115999911, - 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'fare': 17.85}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695159649087, - 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', 'fare': 19.1}, - {'_hoodie_commit_time': '20240402123035233', 'ts': 1695516137016, - 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'fare': 34.15}] + t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts') + assert t.to_pylist() == [ + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695046462179, + 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', + 'fare': 33.9, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695091554788, + 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', + 'fare': 27.7, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695115999911, + 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', + 'fare': 17.85, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695159649087, + 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', + 'fare': 19.1, + }, + { + '_hoodie_commit_time': '20240402123035233', + 'ts': 1695516137016, + 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', + 'fare': 34.15, + }, + ] From d0e8a096d1948fadb91e2eef8f924464d70c72f9 Mon Sep 17 00:00:00 2001 From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:58:11 -0500 Subject: [PATCH 2/4] update pyproject.toml for python style --- python/pyproject.toml | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index b5ee5636..874a2e06 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -51,30 +51,22 @@ module-name = "hudi._internal" [tool.ruff] target-version = 'py38' -lint.mccabe = { max-complexity = 14 } -lint.flake8-quotes = {inline-quotes = 'single', multiline-quotes = 'double'} -lint.pydocstyle = { convention = 'google' } -format.quote-style = 'single' -lint.ignore = [ - "Q000", - "Q001", - "Q002", - "Q003", - "COM812", - "COM819", - "D104", - "I001", - "UP006", - "UP007", - "UP037", - "E501", # Formatted code may exceed the line length, leading to line-too-long (E501) errors. +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +lint.select = [ + "E4", + "E7", + "E9", + "F", + # isort + "I", ] +# don't ignore any rule unless it becomes imperative +lint.ignore = [] +lint.isort.known-first-party = ["hudi"] [tool.mypy] files = "hudi/*.py" exclude = "^tests" -warn_unused_configs = true -ignore_missing_imports = true strict = true [tool.pytest.ini_options] From 57c70ad6ff576fe9e34fd7ed3d1b5718f4080ef3 Mon Sep 17 00:00:00 2001 From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> Date: Tue, 6 Aug 2024 20:39:10 -0500 Subject: [PATCH 3/4] fix configs --- .github/workflows/compliance.yml | 14 ++-- python/Makefile | 7 +- python/hudi/__init__.py | 2 +- python/hudi/_internal.pyi | 10 +-- python/pyproject.toml | 2 + python/tests/conftest.py | 10 +-- python/tests/test_table_read.py | 126 +++++++++++++++---------------- 7 files changed, 85 insertions(+), 86 deletions(-) diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index dcc49737..9ca8b11c 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -49,20 +49,16 @@ jobs: uses: apache/skywalking-eyes/header@v0.6.0 - name: Check rust code style run: cd python && make check-rust - - name: Set up Python ${{ matrix.python-version }} + - name: Setup Python uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: 3.8 cache: pip - cache-dependency-path: pyproject.toml - - name: Install python linter dependencies + cache-dependency-path: ./python/pyproject.toml + - name: Check python working-directory: ./python run: | make setup-env source venv/bin/activate - pip install ruff mypy - - name: Run python linter - working-directory: ./python - run: | - source venv/bin/activate + make develop make check-python diff --git a/python/Makefile b/python/Makefile index be0131a7..a60205dd 100644 --- a/python/Makefile +++ b/python/Makefile @@ -50,10 +50,11 @@ check-rust: ## Run check on Rust .PHONY: check-python check-python: ## Run check on Python - $(info --- Check Python code quality ---) - pip install ruff mypy + $(info --- Check Python format ---) + ruff format --check --diff . + $(info --- Check Python linting ---) ruff check . - ruff format . + $(info --- Check Python typing ---) mypy . .PHONY: test-rust diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py index 09a93399..b0a792e5 100644 --- a/python/hudi/__init__.py +++ b/python/hudi/__init__.py @@ -15,6 +15,6 @@ # specific language governing permissions and limitations # under the License. -from ._internal import __version__ as __version__ from ._internal import HudiFileSlice as HudiFileSlice from ._internal import HudiTable as HudiTable +from ._internal import __version__ as __version__ diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi index 8f542b34..0f83aeec 100644 --- a/python/hudi/_internal.pyi +++ b/python/hudi/_internal.pyi @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. from dataclasses import dataclass -from typing import Optional, Dict, List +from typing import Dict, List, Optional -import pyarrow +import pyarrow # type: ignore __version__: str @@ -39,9 +39,9 @@ class HudiTable: table_uri: str, options: Optional[Dict[str, str]] = None, ): ... - def get_schema(self) -> 'pyarrow.Schema': ... + def get_schema(self) -> "pyarrow.Schema": ... def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ... def get_file_slices(self) -> List[HudiFileSlice]: ... def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ... - def read_snapshot(self) -> List['pyarrow.RecordBatch']: ... - def read_snapshot_as_of(self, timestamp: str) -> List['pyarrow.RecordBatch']: ... + def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ... + def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ... diff --git a/python/pyproject.toml b/python/pyproject.toml index 874a2e06..367cf465 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -42,6 +42,8 @@ dependencies = [ optional-dependencies = { devel = [ "pytest", "coverage", + "ruff==0.5.2", + "mypy==1.10.1", ] } dynamic = ["version"] diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 416bd90d..b1fd5668 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -23,18 +23,18 @@ def _extract_testing_table(zip_file_path, target_path) -> str: - with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: + with zipfile.ZipFile(zip_file_path, "r") as zip_ref: zip_ref.extractall(target_path) - return os.path.join(target_path, 'trips_table') + return os.path.join(target_path, "trips_table") @pytest.fixture( params=[ - '0.x_cow_partitioned', + "0.x_cow_partitioned", ] ) def get_sample_table(request, tmp_path) -> str: - fixture_path = 'tests/table' + fixture_path = "tests/table" table_name = request.param - zip_file_path = Path(fixture_path).joinpath(f'{table_name}.zip') + zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip") return _extract_testing_table(zip_file_path, tmp_path) diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index 2c100d1f..c3c84c9e 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -20,13 +20,13 @@ from hudi import HudiTable -PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split('.') if s.isnumeric()) < ( +PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < ( 8, 0, 0, ) pytestmark = pytest.mark.skipif( - PYARROW_LE_8_0_0, reason='hudi only supported if pyarrow >= 8.0.0' + PYARROW_LE_8_0_0, reason="hudi only supported if pyarrow >= 8.0.0" ) @@ -35,33 +35,33 @@ def test_sample_table(get_sample_table): table = HudiTable(table_path) assert table.get_schema().names == [ - '_hoodie_commit_time', - '_hoodie_commit_seqno', - '_hoodie_record_key', - '_hoodie_partition_path', - '_hoodie_file_name', - 'ts', - 'uuid', - 'rider', - 'driver', - 'fare', - 'city', + "_hoodie_commit_time", + "_hoodie_commit_seqno", + "_hoodie_record_key", + "_hoodie_partition_path", + "_hoodie_file_name", + "ts", + "uuid", + "rider", + "driver", + "fare", + "city", ] file_slices = table.get_file_slices() assert len(file_slices) == 5 assert set(f.commit_time for f in file_slices) == { - '20240402123035233', - '20240402144910683', + "20240402123035233", + "20240402144910683", } assert all(f.num_records == 1 for f in file_slices) file_slice_paths = [f.base_file_relative_path() for f in file_slices] assert set(file_slice_paths) == { - 'chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet', - 'san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet', - 'san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet', - 'san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet', - 'sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet', + "chennai/68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0_4-12-0_20240402123035233.parquet", + "san_francisco/d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0_1-9-0_20240402123035233.parquet", + "san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet", + "san_francisco/5a226868-2934-4f84-a16f-55124630c68d-0_0-7-24_20240402144910683.parquet", + "sao_paulo/ee915c68-d7f8-44f6-9759-e691add290d8-0_3-11-0_20240402123035233.parquet", } batch = table.read_file_slice(file_slice_paths[0]) @@ -74,72 +74,72 @@ def test_sample_table(get_sample_table): assert len(next(file_slices_gen)) == 2 batches = table.read_snapshot() - t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts') + t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts") assert t.to_pylist() == [ { - '_hoodie_commit_time': '20240402144910683', - 'ts': 1695046462179, - 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', - 'fare': 339.0, + "_hoodie_commit_time": "20240402144910683", + "ts": 1695046462179, + "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00", + "fare": 339.0, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695091554788, - 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', - 'fare': 27.7, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695091554788, + "uuid": "e96c4396-3fad-413a-a942-4cb36106d721", + "fare": 27.7, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695115999911, - 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', - 'fare': 17.85, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695115999911, + "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa", + "fare": 17.85, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695159649087, - 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', - 'fare': 19.1, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695159649087, + "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330", + "fare": 19.1, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695516137016, - 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', - 'fare': 34.15, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695516137016, + "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c", + "fare": 34.15, }, ] - table = HudiTable(table_path, {'hoodie.read.as.of.timestamp': '20240402123035233'}) + table = HudiTable(table_path, {"hoodie.read.as.of.timestamp": "20240402123035233"}) batches = table.read_snapshot() - t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by('ts') + t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts") assert t.to_pylist() == [ { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695046462179, - 'uuid': '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', - 'fare': 33.9, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695046462179, + "uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00", + "fare": 33.9, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695091554788, - 'uuid': 'e96c4396-3fad-413a-a942-4cb36106d721', - 'fare': 27.7, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695091554788, + "uuid": "e96c4396-3fad-413a-a942-4cb36106d721", + "fare": 27.7, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695115999911, - 'uuid': 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', - 'fare': 17.85, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695115999911, + "uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa", + "fare": 17.85, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695159649087, - 'uuid': '334e26e9-8355-45cc-97c6-c31daf0df330', - 'fare': 19.1, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695159649087, + "uuid": "334e26e9-8355-45cc-97c6-c31daf0df330", + "fare": 19.1, }, { - '_hoodie_commit_time': '20240402123035233', - 'ts': 1695516137016, - 'uuid': 'e3cf430c-889d-4015-bc98-59bdce1e530c', - 'fare': 34.15, + "_hoodie_commit_time": "20240402123035233", + "ts": 1695516137016, + "uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c", + "fare": 34.15, }, ] From 628baaee1ad8ad82318f694ba6ad54a0121da226 Mon Sep 17 00:00:00 2001 From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> Date: Tue, 6 Aug 2024 20:42:00 -0500 Subject: [PATCH 4/4] reverse workflow diff --- .github/workflows/compliance.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index 9ca8b11c..dcc49737 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -49,16 +49,20 @@ jobs: uses: apache/skywalking-eyes/header@v0.6.0 - name: Check rust code style run: cd python && make check-rust - - name: Setup Python + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: ${{ matrix.python-version }} cache: pip - cache-dependency-path: ./python/pyproject.toml - - name: Check python + cache-dependency-path: pyproject.toml + - name: Install python linter dependencies working-directory: ./python run: | make setup-env source venv/bin/activate - make develop + pip install ruff mypy + - name: Run python linter + working-directory: ./python + run: | + source venv/bin/activate make check-python