Skip to content

Commit

Permalink
[SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep commo…
Browse files Browse the repository at this point in the history
…n codes

### What changes were proposed in this pull request?

This PR proposes to `pyspark.sql.connect.utils` to keep common codes, especially about dependnecies.

### Why are the changes needed?

For example, [SPARK-41457](https://issues.apache.org/jira/browse/SPARK-41457) added `require_minimum_grpc_version` in `pyspark.sql.pandas.utils` which is actually unrelated from the connect module. we should move all to a separate utils directory for better readability and maintenance.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Existing tests should cover this.

Closes apache#40047 from HyukjinKwon/refactor-utils.

Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
  • Loading branch information
HyukjinKwon committed Feb 16, 2023
1 parent 5182664 commit 7ee8a32
Show file tree
Hide file tree
Showing 17 changed files with 68 additions and 56 deletions.
23 changes: 0 additions & 23 deletions python/pyspark/sql/connect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,3 @@

"""Currently Spark Connect is very experimental and the APIs to interact with
Spark through this API are can be changed at any time without warning."""
import sys

from pyspark.sql.pandas.utils import (
require_minimum_pandas_version,
require_minimum_pyarrow_version,
require_minimum_grpc_version,
)


def check_dependencies(mod_name: str, file_name: str) -> None:
if mod_name == "__main__":
from pyspark.testing.connectutils import should_test_connect, connect_requirement_message

if not should_test_connect:
print(
f"Skipping {file_name} doctests: {connect_requirement_message}",
file=sys.stderr,
)
sys.exit(0)
else:
require_minimum_pandas_version()
require_minimum_pyarrow_version()
require_minimum_grpc_version()
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"SparkConnectClient",
]

from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"""
User-defined function related classes and functions
"""
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
54 changes: 54 additions & 0 deletions python/pyspark/sql/connect/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys

from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version


def check_dependencies(mod_name: str, file_name: str) -> None:
if mod_name == "__main__":
from pyspark.testing.connectutils import should_test_connect, connect_requirement_message

if not should_test_connect:
print(
f"Skipping {file_name} doctests: {connect_requirement_message}",
file=sys.stderr,
)
sys.exit(0)
else:
require_minimum_pandas_version()
require_minimum_pyarrow_version()
require_minimum_grpc_version()


def require_minimum_grpc_version() -> None:
"""Raise ImportError if minimum version of grpc is not installed"""
minimum_grpc_version = "1.48.1"

from distutils.version import LooseVersion

try:
import grpc
except ImportError as error:
raise ImportError(
"grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
) from error
if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
raise ImportError(
"gRPC >= %s must be installed; however, "
"your version was %s." % (minimum_grpc_version, grpc.__version__)
)
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect import check_dependencies
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__, __file__)

Expand Down
19 changes: 0 additions & 19 deletions python/pyspark/sql/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,6 @@ def require_minimum_pyarrow_version() -> None:
)


def require_minimum_grpc_version() -> None:
"""Raise ImportError if minimum version of grpc is not installed"""
minimum_grpc_version = "1.48.1"

from distutils.version import LooseVersion

try:
import grpc
except ImportError as error:
raise ImportError(
"grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
) from error
if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
raise ImportError(
"gRPC >= %s must be installed; however, "
"your version was %s." % (minimum_grpc_version, grpc.__version__)
)


def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
"""Return False if the installed pyarrow version is less than minimum_pyarrow_version
or if pyarrow is not installed."""
Expand Down

0 comments on commit 7ee8a32

Please sign in to comment.