Skip to content

Commit

Permalink
Fix thread leak and deadlock on terminating slaves
Browse files Browse the repository at this point in the history
  • Loading branch information
seder committed Apr 24, 2015
1 parent ee26f67 commit aa7ba53
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ public synchronized void terminateJenkinsSlave(String name) {
if(request.request.slave.name.equals(name)) {
LOGGER.info("Removing enqueued mesos task " + name);
requests.remove(request);
// Also signal the Thread of the MesosComputerLauncher.launch() to exit from latch.await()
// Otherwise the Thread will stay in WAIT forever -> Leak!
request.result.failed(request.request.slave);
return;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import static hudson.util.TimeUnit2.MINUTES;

import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Logger;

import org.kohsuke.stapler.DataBoundConstructor;
Expand All @@ -35,6 +36,7 @@ public class MesosRetentionStrategy extends RetentionStrategy<MesosComputer> {
* terminated.
*/
public final int idleTerminationMinutes;
private ReentrantLock checkLock = new ReentrantLock(false);

private static final Logger LOGGER = Logger
.getLogger(MesosRetentionStrategy.class.getName());
Expand All @@ -43,8 +45,21 @@ public MesosRetentionStrategy(int idleTerminationMinutes) {
this.idleTerminationMinutes = idleTerminationMinutes;
}


@Override
public synchronized long check(MesosComputer c) {
public long check(MesosComputer c) {
if (!checkLock.tryLock()) {
return 1;
} else {
try {
return checkInternal(c);
} finally {
checkLock.unlock();
}
}
}

private long checkInternal(MesosComputer c) {
if (c.getNode() == null) {
return 1;
}
Expand Down

0 comments on commit aa7ba53

Please sign in to comment.