2323import sys
2424import traceback
2525import time
26+ import gc
2627from errno import EINTR , ECHILD , EAGAIN
2728from socket import AF_INET , SOCK_STREAM , SOMAXCONN
2829from signal import SIGHUP , SIGTERM , SIGCHLD , SIG_DFL , SIG_IGN
@@ -42,43 +43,24 @@ def worker(sock):
4243 """
4344 Called by a worker process after the fork().
4445 """
45- # Redirect stdout to stderr
46- os .dup2 (2 , 1 )
47- sys .stdout = sys .stderr # The sys.stdout object is different from file descriptor 1
48-
4946 signal .signal (SIGHUP , SIG_DFL )
5047 signal .signal (SIGCHLD , SIG_DFL )
5148 signal .signal (SIGTERM , SIG_DFL )
5249
53- # Blocks until the socket is closed by draining the input stream
54- # until it raises an exception or returns EOF.
55- def waitSocketClose (sock ):
56- try :
57- while True :
58- # Empty string is returned upon EOF (and only then).
59- if sock .recv (4096 ) == '' :
60- return
61- except :
62- pass
63-
6450 # Read the socket using fdopen instead of socket.makefile() because the latter
6551 # seems to be very slow; note that we need to dup() the file descriptor because
6652 # otherwise writes also cause a seek that makes us miss data on the read side.
6753 infile = os .fdopen (os .dup (sock .fileno ()), "a+" , 65536 )
6854 outfile = os .fdopen (os .dup (sock .fileno ()), "a+" , 65536 )
6955 exit_code = 0
7056 try :
71- # Acknowledge that the fork was successful
72- write_int (os .getpid (), outfile )
73- outfile .flush ()
7457 worker_main (infile , outfile )
7558 except SystemExit as exc :
76- exit_code = exc .code
59+ exit_code = compute_real_exit_code ( exc .code )
7760 finally :
7861 outfile .flush ()
79- # The Scala side will close the socket upon task completion.
80- waitSocketClose (sock )
81- os ._exit (compute_real_exit_code (exit_code ))
62+ if exit_code :
63+ os ._exit (exit_code )
8264
8365
8466# Cleanup zombie children
@@ -102,6 +84,7 @@ def manager():
10284 listen_sock .listen (max (1024 , SOMAXCONN ))
10385 listen_host , listen_port = listen_sock .getsockname ()
10486 write_int (listen_port , sys .stdout )
87+ sys .stdout .flush ()
10588
10689 def shutdown (code ):
10790 signal .signal (SIGTERM , SIG_DFL )
@@ -114,8 +97,9 @@ def handle_sigterm(*args):
11497 signal .signal (SIGTERM , handle_sigterm ) # Gracefully exit on SIGTERM
11598 signal .signal (SIGHUP , SIG_IGN ) # Don't die on SIGHUP
11699
100+ reuse = os .environ .get ("SPARK_REUSE_WORKER" )
101+
117102 # Initialization complete
118- sys .stdout .close ()
119103 try :
120104 while True :
121105 try :
@@ -167,7 +151,19 @@ def handle_sigterm(*args):
167151 # in child process
168152 listen_sock .close ()
169153 try :
170- worker (sock )
154+ # Acknowledge that the fork was successful
155+ outfile = sock .makefile ("w" )
156+ write_int (os .getpid (), outfile )
157+ outfile .flush ()
158+ outfile .close ()
159+ while True :
160+ worker (sock )
161+ if not reuse :
162+ # wait for closing
163+ while sock .recv (1024 ):
164+ pass
165+ break
166+ gc .collect ()
171167 except :
172168 traceback .print_exc ()
173169 os ._exit (1 )
0 commit comments