Skip to content

Commit 972366b

Browse files
authored
Merge pull request #597 from vlm/small-file-limit
Fix "Too many open files" if too small of a system limit is encountered
2 parents b075db1 + 9e12eb4 commit 972366b

File tree

8 files changed

+250
-35
lines changed

8 files changed

+250
-35
lines changed

include/llbuild/Basic/PlatformUtility.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ std::string makeTmpDir();
5353
// Return a string containing all valid path separators on the current platform
5454
std::string getPathSeparators();
5555

56+
/// Gets the max open file limit for the current process.
57+
/// Returns: 0 on failure, otherwise the max number of open files.
58+
llbuild_rlim_t getOpenFileLimit();
59+
5660
/// Sets the max open file limit to min(max(soft_limit, limit), hard_limit),
5761
/// where soft_limit and hard_limit are gathered from the system.
5862
///

lib/Basic/LaneBasedExecutionQueue.cpp

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llbuild/Basic/ExecutionQueue.h"
14+
#include "llbuild/Basic/PlatformUtility.h"
1415

1516
#include "llbuild/Basic/Tracing.h"
1617

@@ -195,22 +196,16 @@ class LaneBasedExecutionQueue : public ExecutionQueue {
195196

196197
public:
197198
LaneBasedExecutionQueue(ExecutionQueueDelegate& delegate,
198-
unsigned numLanes, SchedulerAlgorithm alg,
199+
unsigned numLanesSuggestion, SchedulerAlgorithm alg,
199200
const char* const* environment)
200-
: ExecutionQueue(delegate), buildID(std::random_device()()), numLanes(numLanes),
201+
: ExecutionQueue(delegate), buildID(std::random_device()()),
201202
readyJobs(Scheduler::make(alg)), environment(environment)
202203
{
203-
// Configure the background task maximum. We currently support an
204-
// environmental override for experimentation pursposes, but otherwise limit
205-
// to a modest multiple of the core count, since we currently burn one thread
206-
// per background task.
207-
char *p = getenv("LLBUILD_BACKGROUND_TASK_MAX");
208-
if (p && !StringRef(p).getAsInteger(10, backgroundTaskMax)) {
209-
// Parsed.
210-
} else {
211-
backgroundTaskMax = std::min(1024U, numLanes * 64U);
212-
}
213-
204+
205+
auto taskLimits = estimateTaskLimits(numLanesSuggestion);
206+
numLanes = taskLimits.first;
207+
backgroundTaskMax = taskLimits.second;
208+
214209
for (unsigned i = 0; i != numLanes; ++i) {
215210
lanes.push_back(std::unique_ptr<std::thread>(
216211
new std::thread(
@@ -243,6 +238,52 @@ class LaneBasedExecutionQueue : public ExecutionQueue {
243238
}
244239
}
245240

241+
/// Returns the number of allowed foreground and background tasks.
242+
static auto estimateTaskLimits(unsigned numLanes) -> std::pair<unsigned, unsigned> {
243+
llbuild_rlim_t curOpenFileLimit = llbuild::basic::sys::getOpenFileLimit();
244+
const unsigned reservedFileCount = (STDERR_FILENO+1) + 2 /* Database */
245+
+ 1 /* Logging */
246+
+ 2 /* Additional fds during spawn */
247+
+ 2 /* Fudge factor */;
248+
if (curOpenFileLimit < reservedFileCount) {
249+
assert(curOpenFileLimit < reservedFileCount);
250+
// Certainly can't afford background tasks.
251+
// Maybe even can't afford building altogether, but let's risk it.
252+
return std::make_pair(1, 0);
253+
}
254+
255+
unsigned allowedFilesForTasks = static_cast<unsigned>(std::min(curOpenFileLimit, static_cast<llbuild_rlim_t>(INT_MAX))) - reservedFileCount;
256+
unsigned filesPerTask = 2; // A task has output [and control] file descriptors.
257+
unsigned maxConcurrentTasks = allowedFilesForTasks / filesPerTask;
258+
259+
if (numLanes > maxConcurrentTasks) {
260+
// Can't afford background tasks, and maybe won't even support
261+
// the full extent of requested concurrency.
262+
numLanes = std::max(1u, maxConcurrentTasks);
263+
return std::make_pair(numLanes, 0);
264+
}
265+
266+
// Number of tasks that can be run, according to open file limits.
267+
unsigned extraTasksMax = maxConcurrentTasks - numLanes;
268+
269+
// Configure the background task maximum. We currently support an
270+
// environmental override for experimentation purposes, but otherwise
271+
// limit to a modest multiple of the core count, since we currently burn
272+
// one thread per background task.
273+
unsigned backgroundTaskMax = 0;
274+
char *p = getenv("LLBUILD_BACKGROUND_TASK_MAX");
275+
if (p && !StringRef(p).getAsInteger(10, backgroundTaskMax)) {
276+
// Parsed.
277+
} else {
278+
backgroundTaskMax = std::min(1024U, numLanes * 64U);
279+
}
280+
281+
// The number of background can't exceed available concurrency.
282+
backgroundTaskMax = std::min(backgroundTaskMax, extraTasksMax);
283+
284+
return std::make_pair(numLanes, backgroundTaskMax);
285+
}
286+
246287
virtual void addJob(QueueJob job) override {
247288
uint64_t readyJobsCount;
248289
{
@@ -328,22 +369,15 @@ class LaneBasedExecutionQueue : public ExecutionQueue {
328369
handle.id = context.jobID;
329370

330371
ProcessReleaseFn releaseFn = [this](std::function<void()>&& processWait) {
331-
bool releaseAllowed = false;
332-
// This check is not guaranteed to prevent more than backgroundTaskMax
333-
// tasks from releasing. We could race between the check and increment and
334-
// thus have a few extra. However, for our purposes, this should be fine.
335-
// The cap is primarly intended to prevent runaway explosions of tasks.
336-
if (backgroundTaskCount < backgroundTaskMax) {
337-
backgroundTaskCount++;
338-
releaseAllowed = true;
339-
}
340-
if (releaseAllowed) {
372+
auto previousTaskCount = backgroundTaskCount.fetch_add(1);
373+
if (previousTaskCount < backgroundTaskMax) {
341374
// Launch the process wait on a detached thread
342375
std::thread([this, processWait=std::move(processWait)]() mutable {
343376
processWait();
344377
backgroundTaskCount--;
345378
}).detach();
346379
} else {
380+
backgroundTaskCount--;
347381
// not allowed to release, call wait directly
348382
processWait();
349383
}

lib/Basic/PlatformUtility.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,22 @@ int sys::write(int fileHandle, void *destinationBuffer,
212212
#endif
213213
}
214214

215+
// Get the current process' open file limit. Returns -1 on failure.
216+
llbuild_rlim_t sys::getOpenFileLimit() {
217+
#if defined(_WIN32)
218+
int value = _getmaxstdio();
219+
return std::min(0, value);
220+
#else
221+
struct rlimit rl;
222+
int ret = getrlimit(RLIMIT_NOFILE, &rl);
223+
if (ret != 0) {
224+
return 0;
225+
}
226+
227+
return rl.rlim_cur;
228+
#endif
229+
}
230+
215231
// Raise the open file limit, returns 0 on success, -1 on failure
216232
int sys::raiseOpenFileLimit(llbuild_rlim_t limit) {
217233
#if defined(_WIN32)

tests/Ninja/Build/Inputs/close-control-fd

Lines changed: 0 additions & 10 deletions
This file was deleted.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
3+
# Close the control channel and run a given command.
4+
# Example usage:
5+
#
6+
# > exec ./run-releasing-control-fd sleep 10
7+
#
8+
# The `exec` prefix is necessary to prevent the file descriptor being held
9+
# by the outer shell while running the `run-relesing-control-fd`.
10+
11+
from __future__ import print_function
12+
import subprocess
13+
import sys
14+
import os
15+
16+
def usage():
17+
print("Expected: [--require-control-fd] <command> [args...]")
18+
sys.exit(1)
19+
20+
def main(argv):
21+
require_control_fd = False
22+
23+
# Parse command line options
24+
if len(argv) <= 1:
25+
usage()
26+
elif argv[1] == "--require-control-fd":
27+
if len(argv) == 2:
28+
usage()
29+
argv=argv[1:]
30+
require_control_fd = True
31+
elif argv[1][0:1] == "-":
32+
usage()
33+
34+
exec_args = argv[1:]
35+
36+
# Close the control descriptor, if given.
37+
control_fd = os.environ.get("LLBUILD_CONTROL_FD")
38+
if control_fd is None and require_control_fd:
39+
print("%s: missing LLBUILD_CONTROL_FD" % exec_args[0], file=sys.stderr)
40+
sys.exit(1)
41+
elif control_fd is not None:
42+
del os.environ["LLBUILD_CONTROL_FD"]
43+
os.close(int(control_fd))
44+
45+
use_shell = len(exec_args) == 1 and ' ' in exec_args[0]
46+
subprocess.check_call(exec_args, shell=use_shell)
47+
48+
if __name__ == '__main__':
49+
main(sys.argv)

tests/Ninja/Build/Inputs/ulimited

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python
2+
3+
# Example usage:
4+
# > ./ulimited -n 50 echo foo
5+
# Prior RLIMIT_NOFILE: soft=256, hard=RLIM_INFINITY
6+
# Reset RLIMIT_NOFILE: soft=50, hard=RLIM_INFINITY
7+
# foo
8+
# >
9+
10+
11+
from __future__ import print_function
12+
import errno
13+
import subprocess
14+
import resource
15+
import sys
16+
import os
17+
18+
19+
def main(argv):
20+
if len(argv) < 4 or argv[1] != "-n":
21+
print("Expected: -n <n> <command-exe> [args...]")
22+
sys.exit(1)
23+
24+
try:
25+
new_limit = int(argv[2])
26+
except ValueError as e:
27+
print("-n: %s" % e)
28+
sys.exit(os.EX_USAGE)
29+
30+
execute_with_limit(new_limit, argv[3:])
31+
32+
print("%s: Cannot execute", argv[3])
33+
exit(os.EX_OSERR)
34+
35+
36+
def execute_with_limit(new_limit, arguments):
37+
max_fd_soft, max_fd_hard = get_and_show_limit("Prior", "RLIMIT_NOFILE")
38+
resource.setrlimit(resource.RLIMIT_NOFILE, [new_limit, max_fd_hard])
39+
_ = get_and_show_limit("Reset", "RLIMIT_NOFILE")
40+
os.execvp(arguments[0], arguments)
41+
42+
43+
def stringify_limit(limit):
44+
if limit == resource.RLIM_INFINITY:
45+
return "RLIM_INFINITY"
46+
return limit
47+
48+
49+
def get_and_show_limit(prefix, name):
50+
max_fd_soft, max_fd_hard = resource.getrlimit(getattr(resource, name))
51+
max_fd_soft_s = stringify_limit(max_fd_soft)
52+
max_fd_hard_s = stringify_limit(max_fd_hard)
53+
print("%s %s: {soft: %s, hard: %s}"
54+
% (prefix, name, max_fd_soft_s, max_fd_hard_s), file=sys.stderr)
55+
return (max_fd_soft, max_fd_hard)
56+
57+
58+
if __name__ == '__main__':
59+
main(sys.argv)

tests/Ninja/Build/console-pool-no-control-fd.ninja

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# RUN: rm -rf %t.build
44
# RUN: mkdir -p %t.build
55
# RUN: cp %s %t.build/build.ninja
6-
# RUN: cp %S/Inputs/close-control-fd %t.build
6+
# RUN: cp %S/Inputs/run-releasing-control-fd %t.build
77
# RUN: cp %S/Inputs/wait-for-file %t.build
88
# RUN: %{llbuild} ninja build --jobs 3 --no-db --chdir %t.build &> %t.out
99
# RUN: %{FileCheck} < %t.out %s
@@ -22,7 +22,7 @@ rule CUSTOM
2222
command = ${COMMAND}
2323

2424
build first: CUSTOM
25-
command = touch executing && . ./close-control-fd && ./wait-for-file stop && rm -f executing
25+
command = touch executing && exec ./run-releasing-control-fd "./wait-for-file stop && rm -f executing"
2626

2727
build second: CUSTOM
2828
command = test ! -f executing

tests/Ninja/Build/file-limit.ninja

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Check operation under max file descriptor constraints
2+
3+
# RUN: rm -rf %t.build
4+
# RUN: mkdir -p %t.build
5+
# RUN: cp %s %t.build/build.ninja
6+
# RUN: cp %S/Inputs/run-releasing-control-fd %t.build
7+
# RUN: cp %S/Inputs/ulimited %t.build
8+
# RUN: %t.build/ulimited -n 30 %{llbuild} ninja build --jobs 02 --chdir %t.build &> %t.out
9+
# RUN: %t.build/ulimited -n 40 %{llbuild} ninja build --jobs 04 --chdir %t.build &> %t.out
10+
# RUN: %t.build/ulimited -n 60 %{llbuild} ninja build --jobs 10 --chdir %t.build &> %t.out
11+
# RUN: %t.build/ulimited -n 80 %{llbuild} ninja build --jobs 10 --chdir %t.build &> %t.out
12+
#
13+
rule WAIT
14+
command = exec ./run-releasing-control-fd "sleep 0.1"
15+
16+
build 00: WAIT
17+
build 01: WAIT
18+
build 02: WAIT
19+
build 03: WAIT
20+
build 04: WAIT
21+
build 05: WAIT
22+
build 06: WAIT
23+
build 07: WAIT
24+
build 08: WAIT
25+
build 09: WAIT
26+
build 10: WAIT
27+
build 11: WAIT
28+
build 12: WAIT
29+
build 13: WAIT
30+
build 14: WAIT
31+
build 15: WAIT
32+
build 16: WAIT
33+
build 17: WAIT
34+
build 18: WAIT
35+
build 19: WAIT
36+
build 20: WAIT
37+
build 21: WAIT
38+
build 22: WAIT
39+
build 23: WAIT
40+
build 24: WAIT
41+
build 25: WAIT
42+
build 26: WAIT
43+
build 27: WAIT
44+
build 28: WAIT
45+
build 29: WAIT
46+
build 30: WAIT
47+
build 31: WAIT
48+
build 32: WAIT
49+
build 33: WAIT
50+
build 34: WAIT
51+
build 35: WAIT
52+
build 36: WAIT
53+
build 37: WAIT
54+
build 38: WAIT
55+
build 39: WAIT
56+
57+
build output: phony $
58+
00 01 02 03 04 05 06 07 08 09 $
59+
10 11 12 13 14 15 16 17 18 19 $
60+
20 21 22 23 24 25 26 27 28 29 $
61+
30 31 32 33 34 35 36 37 38 39
62+
63+
default output

0 commit comments

Comments
 (0)