-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25253][PYSPARK] Refactor local connection & auth code #22247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,7 @@ | |
| from __future__ import print_function | ||
| import socket | ||
|
|
||
| from pyspark.java_gateway import do_server_auth | ||
| from pyspark.java_gateway import local_connect_and_auth | ||
| from pyspark.serializers import write_int, UTF8Deserializer | ||
|
|
||
|
|
||
|
|
@@ -108,38 +108,12 @@ def _load_from_socket(port, auth_secret): | |
| """ | ||
| Load data from a given socket, this is a blocking method thus only return when the socket | ||
| connection has been closed. | ||
|
|
||
| This is copied from context.py, while modified the message protocol. | ||
| """ | ||
| sock = None | ||
| # Support for both IPv4 and IPv6. | ||
| # On most of IPv6-ready systems, IPv6 will take precedence. | ||
| for res in socket.getaddrinfo("localhost", port, socket.AF_UNSPEC, socket.SOCK_STREAM): | ||
| af, socktype, proto, canonname, sa = res | ||
| sock = socket.socket(af, socktype, proto) | ||
| try: | ||
| # Do not allow timeout for socket reading operation. | ||
| sock.settimeout(None) | ||
| sock.connect(sa) | ||
| except socket.error: | ||
| sock.close() | ||
| sock = None | ||
| continue | ||
| break | ||
| if not sock: | ||
| raise Exception("could not open socket") | ||
|
|
||
| # We don't really need a socket file here, it's just for convenience that we can reuse the | ||
| # do_server_auth() function and data serialization methods. | ||
| sockfile = sock.makefile("rwb", 65536) | ||
|
|
||
| (sockfile, sock) = local_connect_and_auth(port, auth_secret) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We must set sock timeout to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch, thanks! updated |
||
| # Make a barrier() function call. | ||
| write_int(BARRIER_FUNCTION, sockfile) | ||
| sockfile.flush() | ||
|
|
||
| # Do server auth. | ||
| do_server_auth(sockfile, auth_secret) | ||
|
|
||
| # Collect result. | ||
| res = UTF8Deserializer().loads(sockfile) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,7 @@ | |
|
|
||
| from pyspark.accumulators import _accumulatorRegistry | ||
| from pyspark.broadcast import Broadcast, _broadcastRegistry | ||
| from pyspark.java_gateway import do_server_auth | ||
| from pyspark.java_gateway import local_connect_and_auth | ||
| from pyspark.taskcontext import BarrierTaskContext, TaskContext | ||
| from pyspark.files import SparkFiles | ||
| from pyspark.rdd import PythonEvalType | ||
|
|
@@ -364,8 +364,5 @@ def process(): | |
| # Read information about how to connect back to the JVM from the environment. | ||
| java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"]) | ||
| auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"] | ||
| sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
| sock.connect(("127.0.0.1", java_port)) | ||
| sock_file = sock.makefile("rwb", 65536) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vanzin, BTW, did you test this on Windows too?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I quickly tested and seems working fine. Please ignore this comment. |
||
| do_server_auth(sock_file, auth_secret) | ||
| (sock_file, _) = local_connect_and_auth(java_port, auth_secret) | ||
| main(sock_file, sock_file) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Slight shorter (and more "python-compliant"?):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done