diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 8613d2d09ea4..86f576a30291 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -22,9 +22,11 @@ """ import atexit +import builtins import os import platform import warnings +import sys import pyspark from pyspark.context import SparkContext @@ -33,6 +35,16 @@ from pyspark.sql.utils import is_remote from urllib.parse import urlparse +if getattr(builtins, "__IPYTHON__", False): + # (Only) during PYTHONSTARTUP execution, IPython temporarily adds the parent + # directory of the script into the Python path, which results in searching + # packages under `pyspark` directory. + # For example, `import pandas` attempts to import `pyspark.pandas`, see also SPARK-42266. + if "__file__" in globals(): + parent_dir = os.path.abspath(os.path.dirname(__file__)) + if parent_dir in sys.path: + sys.path.remove(parent_dir) + if is_remote(): try: