Skip to content

Commit 35c98d0

Browse files
committed
Revert "Let workers finish lost races without delaying dynamic execution."
This reverts commit 6080c1e.
1 parent 988b56f commit 35c98d0

File tree

5 files changed

+23
-73
lines changed

5 files changed

+23
-73
lines changed

src/main/java/com/google/devtools/build/lib/worker/SingleplexWorker.java

+18-6
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,24 @@ void putRequest(WorkRequest request) throws IOException {
120120
@Override
121121
WorkResponse getResponse(int requestId) throws IOException, InterruptedException {
122122
recordingInputStream.startRecording(4096);
123-
while (recordingInputStream.available() == 0) {
124-
Thread.sleep(10);
125-
if (!process.isAlive()) {
126-
throw new IOException(
127-
String.format(
128-
"Worker process for %s died while waiting for response", workerKey.getMnemonic()));
123+
// Ironically, we don't allow interrupts during dynamic execution, since we can't cancel
124+
// the worker short of destroying it.
125+
if (!workerKey.isSpeculative()) {
126+
while (recordingInputStream.available() == 0) {
127+
try {
128+
Thread.sleep(10);
129+
} catch (InterruptedException e) {
130+
// This should only happen when not in dynamic execution, so we can safely kill the
131+
// worker.
132+
destroy();
133+
throw e;
134+
}
135+
if (!process.isAlive()) {
136+
throw new IOException(
137+
String.format(
138+
"Worker process for %s died while waiting for response",
139+
workerKey.getMnemonic()));
140+
}
129141
}
130142
}
131143
return workerProtocol.getResponse();

src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java

-39
Original file line numberDiff line numberDiff line change
@@ -457,10 +457,6 @@ WorkResponse execInWorker(
457457

458458
try {
459459
response = worker.getResponse(request.getRequestId());
460-
} catch (InterruptedException e) {
461-
finishWorkAsync(key, worker, request);
462-
worker = null;
463-
throw e;
464460
} catch (IOException e) {
465461
restoreInterrupt(e);
466462
// If protobuf or json reader couldn't parse the response, try to print whatever the
@@ -518,41 +514,6 @@ WorkResponse execInWorker(
518514
return response;
519515
}
520516

521-
/**
522-
* Starts a thread to collect the response from a worker when it's no longer of interest.
523-
*
524-
* <p>This can happen either when we lost the race in dynamic execution or the build got
525-
* interrupted. This takes ownership of the worker for purposes of returning it to the worker
526-
* pool.
527-
*/
528-
private void finishWorkAsync(WorkerKey key, Worker worker, WorkRequest request) {
529-
Thread reaper =
530-
new Thread(
531-
() -> {
532-
Worker w = worker;
533-
try {
534-
w.getResponse(request.getRequestId());
535-
} catch (IOException | InterruptedException e1) {
536-
// If this happens, we either can't trust the output of the worker, or we got
537-
// interrupted while handling being interrupted. In the latter case, let's stop
538-
// trying and just destroy the worker. If it's a singleplex worker, there will
539-
// be a dangling response that we don't want to keep trying to read, so we destroy
540-
// the worker.
541-
try {
542-
workers.invalidateObject(key, w);
543-
w = null;
544-
} catch (IOException | InterruptedException e2) {
545-
// The reaper thread can't do anything useful about this.
546-
}
547-
} finally {
548-
if (w != null) {
549-
workers.returnObject(key, w);
550-
}
551-
}
552-
});
553-
reaper.start();
554-
}
555-
556517
private static void restoreInterrupt(IOException e) {
557518
if (e instanceof InterruptedIOException) {
558519
Thread.currentThread().interrupt();

src/test/java/com/google/devtools/build/lib/worker/ExampleWorker.java

-20
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,8 @@
3535
import java.util.Map;
3636
import java.util.Random;
3737
import java.util.UUID;
38-
import java.util.concurrent.Semaphore;
3938
import java.util.regex.Matcher;
4039
import java.util.regex.Pattern;
41-
import sun.misc.Signal;
42-
import sun.misc.SignalHandler;
4340

4441
/** An example implementation of a worker process that is used for integration tests. */
4542
public class ExampleWorker {
@@ -82,23 +79,6 @@ private static void runPersistentWorker(ExampleWorkerOptions workerOptions) thro
8279
PrintStream originalStdOut = System.out;
8380
PrintStream originalStdErr = System.err;
8481

85-
if (workerOptions.waitForSignal) {
86-
Semaphore signalSem = new Semaphore(0);
87-
Signal.handle(
88-
new Signal("HUP"),
89-
new SignalHandler() {
90-
@Override
91-
public void handle(Signal sig) {
92-
signalSem.release();
93-
}
94-
});
95-
try {
96-
signalSem.acquire();
97-
} catch (InterruptedException e) {
98-
System.out.println("Interrupted while waiting for signal");
99-
e.printStackTrace();
100-
}
101-
}
10282
ExampleWorkerProtocol workerProtocol = null;
10383
switch (workerOptions.workerProtocol) {
10484
case JSON:

src/test/java/com/google/devtools/build/lib/worker/ExampleWorkerOptions.java

-8
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,6 @@ public static class ExampleWorkOptions extends OptionsBase {
135135
)
136136
public boolean hardPoison;
137137

138-
@Option(
139-
name = "wait_for_signal",
140-
documentationCategory = OptionDocumentationCategory.UNCATEGORIZED,
141-
effectTags = {OptionEffectTag.NO_OP},
142-
defaultValue = "false",
143-
help = "Don't send a response until receiving a SIGXXXX.")
144-
public boolean waitForSignal;
145-
146138
/** Enum converter for --worker_protocol. */
147139
public static class WorkerProtocolEnumConverter
148140
extends EnumConverter<ExecutionRequirements.WorkerProtocolFormat> {

src/test/java/com/google/devtools/build/lib/worker/WorkerTest.java

+5
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ private void verifyGetResponseFailure(String responseString, String expectedErro
174174
assertThat(ex).hasMessageThat().contains(expectedError);
175175
}
176176

177+
@Test
178+
public void testGetResponse_json_emptyString_throws() throws IOException {
179+
verifyGetResponseFailure("", "Could not parse json work request correctly");
180+
}
181+
177182
@Test
178183
public void testGetResponse_badJson_throws() throws IOException {
179184
verifyGetResponseFailure(

0 commit comments

Comments
 (0)