diff --git a/dev/check-codegen-python.py b/dev/check-codegen-python.py index 616b31eb4740..bcb2b0341da9 100755 --- a/dev/check-codegen-python.py +++ b/dev/check-codegen-python.py @@ -75,7 +75,9 @@ def check_connect_protos(): else: fail( "Generated files for pyspark-connect are out of sync! " - "Please run ./connector/connect/dev/generate_protos.sh" + "If you have touched files under connector/connect/src/main/protobuf, " + "please run ./connector/connect/dev/generate_protos.sh. " + "If you haven't touched any file above, please rebase your PR against main branch." ) diff --git a/python/pyspark/sql/connect/README.md b/python/pyspark/sql/connect/README.md index fa712677c120..dc40a50b84dc 100644 --- a/python/pyspark/sql/connect/README.md +++ b/python/pyspark/sql/connect/README.md @@ -49,3 +49,8 @@ To use the release version of Spark Connect: ./python/run-tests --testnames 'pyspark.sql.tests.connect.test_connect_basic' ``` +## Generate proto generated files for the Python client +1. Install `buf version 1.8.0`: https://docs.buf.build/installation +2. Run `pip install grpcio==1.48.1 protobuf==4.21.6 mypy-protobuf==3.3.0` +3. Run `./dev/generate_protos.sh` +