@@ -778,15 +778,6 @@ notify_empty(rv::RemoteValue) = notify(rv.empty)
778
778
779
779
# # message event handlers ##
780
780
781
- # activity on accept fd
782
- function accept_handler (server:: TCPServer , status:: Int32 )
783
- if status == - 1
784
- error (" an error occured during the creation of the server" )
785
- end
786
- client = accept_nonblock (server)
787
- process_messages (client, client)
788
- end
789
-
790
781
process_messages (r_stream:: TCPSocket , w_stream:: TCPSocket ) = process_messages (r_stream, w_stream, nothing )
791
782
process_messages (r_stream:: TCPSocket , w_stream:: TCPSocket , rr_ntfy_join) = @schedule process_tcp_streams (r_stream, w_stream, rr_ntfy_join)
792
783
@@ -903,29 +894,32 @@ function message_handler_loop(r_stream::AsyncStream, w_stream::AsyncStream, rr_n
903
894
end # end of while
904
895
catch e
905
896
iderr = worker_id_from_socket (r_stream)
906
- werr = worker_from_id (iderr)
907
- oldstate = werr. state
908
- set_worker_state (werr, W_TERMINATED)
909
-
910
-
911
- # If error occured talking to pid 1, commit harakiri
912
- if iderr == 1
913
- if isopen (w_stream)
914
- print (STDERR, " fatal error on " , myid (), " : " )
915
- display_error (e, catch_backtrace ())
897
+ if (iderr < 1 )
898
+ print (STDERR, " Socket from unknown remote worker in worker " , myid ())
899
+ else
900
+ werr = worker_from_id (iderr)
901
+ oldstate = werr. state
902
+ set_worker_state (werr, W_TERMINATED)
903
+
904
+ # If error occured talking to pid 1, commit harakiri
905
+ if iderr == 1
906
+ if isopen (w_stream)
907
+ print (STDERR, " fatal error on " , myid (), " : " )
908
+ display_error (e, catch_backtrace ())
909
+ end
910
+ exit (1 )
916
911
end
917
- exit (1 )
918
- end
919
912
920
- # Will treat any exception as death of node and cleanup
921
- # since currently we do not have a mechanism for workers to reconnect
922
- # to each other on unhandled errors
923
- deregister_worker (iderr)
913
+ # Will treat any exception as death of node and cleanup
914
+ # since currently we do not have a mechanism for workers to reconnect
915
+ # to each other on unhandled errors
916
+ deregister_worker (iderr)
917
+ end
924
918
925
919
if isopen (r_stream) close (r_stream) end
926
920
if isopen (w_stream) close (w_stream) end
927
921
928
- if (myid () == 1 )
922
+ if (myid () == 1 ) && (iderr > 1 )
929
923
if oldstate != W_TERMINATING
930
924
println (STDERR, " Worker $iderr terminated." )
931
925
rethrow (e)
@@ -977,7 +971,17 @@ function start_worker(out::IO)
977
971
else
978
972
sock = listen (LPROC. bind_port)
979
973
end
980
- sock. ccb = accept_handler
974
+ @schedule begin
975
+ while true
976
+ try
977
+ client = accept (sock)
978
+ process_messages (client, client)
979
+ catch e
980
+ println (STDERR, " Error in accept() : " , e)
981
+ break
982
+ end
983
+ end
984
+ end
981
985
print (out, " julia_worker:" ) # print header
982
986
print (out, " $(dec (LPROC. bind_port)) #" ) # print port
983
987
print (out, LPROC. bind_addr)
0 commit comments