Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkgs/development/compilers/cudatoolkit/common.nix
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ backendStdenv.mkDerivation rec {
ucx
xorg.libxshmfence
xorg.libxkbfile
] ++ (lib.optionals (lib.versionAtLeast version "12.1") (map lib.getLib ([
] ++ (lib.optionals (lib.versionAtLeast version "12") (map lib.getLib ([
# Used by `/target-linux-x64/CollectX/clx` and `/target-linux-x64/CollectX/libclx_api.so` for:
# - `libcurl.so.4`
curlMinimal
Expand Down Expand Up @@ -177,7 +177,9 @@ backendStdenv.mkDerivation rec {
"libcom_err.so.2"
];

preFixup = ''
preFixup = if lib.versionOlder version "11" then ''
patchelf $out/targets/*/lib/libnvrtc.so --add-needed libnvrtc-builtins.so
'' else ''
patchelf $out/lib64/libnvrtc.so --add-needed libnvrtc-builtins.so
'';

Expand Down
10 changes: 2 additions & 8 deletions pkgs/development/libraries/jsoncpp/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,13 @@ stdenv.mkDerivation rec {
"-DBUILD_SHARED_LIBS=ON"
"-DBUILD_OBJECT_LIBS=OFF"
"-DJSONCPP_WITH_CMAKE_PACKAGE=ON"
"-DBUILD_STATIC_LIBS=${if enableStatic then "ON" else "OFF"}"
]
# the test's won't compile if secureMemory is used because there is no
# comparison operators and conversion functions between
# std::basic_string<..., Json::SecureAllocator<char>> vs.
# std::basic_string<..., [default allocator]>
++ lib.optional ((stdenv.buildPlatform != stdenv.hostPlatform) || secureMemory) "-DJSONCPP_WITH_TESTS=OFF"
++ lib.optional (!enableStatic) "-DBUILD_STATIC_LIBS=OFF";

# this is fixed and no longer necessary in 1.9.5 but there they use
# memset_s without switching to a different c++ standard in the cmake files
postInstall = lib.optionalString enableStatic ''
(cd $out/lib && ln -sf libjsoncpp_static.a libjsoncpp.a)
'';
++ lib.optional ((stdenv.buildPlatform != stdenv.hostPlatform) || secureMemory) "-DJSONCPP_WITH_TESTS=OFF";

meta = with lib; {
homepage = "https://github.com/open-source-parsers/jsoncpp";
Expand Down
2 changes: 2 additions & 0 deletions pkgs/development/libraries/libevent/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ stdenv.mkDerivation rec {
})
];

configureFlags = lib.optional (!sslSupport) "--disable-openssl";

preConfigure = lib.optionalString (lib.versionAtLeast stdenv.hostPlatform.darwinMinVersion "11") ''
MACOSX_DEPLOYMENT_TARGET=10.16
'';
Expand Down
48 changes: 48 additions & 0 deletions pkgs/development/libraries/tclap/1.4.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{ lib
, stdenv
, fetchgit
, cmake
, doxygen
, python3
}:
stdenv.mkDerivation {
pname = "tclap";

# This version is slightly newer than 1.4.0-rc1:
# See https://github.com/mirror/tclap/compare/1.4.0-rc1..3feeb7b2499b37d9cb80890cadaf7c905a9a50c6
version = "1.4-3feeb7b";

src = fetchgit {
url = "git://git.code.sf.net/p/tclap/code";
rev = "3feeb7b2499b37d9cb80890cadaf7c905a9a50c6"; # 1.4 branch
hash = "sha256-byLianB6Vf+I9ABMmsmuoGU2o5RO9c5sMckWW0F+GDM=";
};

postPatch = ''
substituteInPlace CMakeLists.txt \
--replace '$'{CMAKE_INSTALL_LIBDIR_ARCHIND} '$'{CMAKE_INSTALL_LIBDIR}
substituteInPlace packaging/pkgconfig.pc.in \
--replace '$'{prefix}/@CMAKE_INSTALL_INCLUDEDIR@ @CMAKE_INSTALL_FULL_INCLUDEDIR@
'';

nativeBuildInputs = [
cmake
doxygen
python3
];

# Installing docs is broken in this package+version so we stub out some files
preInstall = ''
touch docs/manual.html
'';

doCheck = true;

meta = with lib; {
description = "Templatized C++ Command Line Parser Library (v1.4)";
homepage = "https://tclap.sourceforge.net/";
license = licenses.mit;
maintainers = teams.deshaw.members;
platforms = platforms.all;
};
}
147 changes: 147 additions & 0 deletions pkgs/os-specific/linux/dcgm/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{ lib
, callPackage
, gcc11Stdenv
, fetchFromGitHub
, addOpenGLRunpath
, catch2
, cmake
, cudaPackages_10_2
, cudaPackages_11_8
, cudaPackages_12
, fmt_9
, git
, jsoncpp
, libevent
, plog
, python3
, symlinkJoin
, tclap_1_4
, yaml-cpp
}:
let
# Flags copied from DCGM's libevent build script
libevent-nossl = libevent.override { sslSupport = false; };
libevent-nossl-static = libevent-nossl.overrideAttrs (super: {
CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];
});

jsoncpp-static = jsoncpp.override { enableStatic = true; };
Comment on lines 23 to 30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are these build flags necessary? if at all possible, it is preferable to just use dependencies as they are default packaged

Copy link
Author

@de11n de11n Jul 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DCGM is very particular about its build. Its entire build system is actually deterministic, but it uses Docker to achieve that. This makes it extremely particular about the exact build flags used for every dependency. These dependencies are built with these specific flags and I ran into build failures in other configurations. While it might be theoretically possible to find a different configuration that succeeds, this one exactly matches what upstream does in its build system so it is the most likely to succeed and have matching behavior.


# DCGM depends on 3 different versions of CUDA at the same time.
# The runtime closure, thankfully, is quite small because most things
# are statically linked.
cudaPackageSetByVersion = [
{
version = "10";
# Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
pkgSet = [
cudaPackages_10_2.cudatoolkit
cudaPackages_10_2.cudatoolkit.lib
];
}
{
version = "11";
pkgSet = getCudaPackages cudaPackages_11_8;
}
{
version = "12";
pkgSet = getCudaPackages cudaPackages_12;
}
];

# Select needed redist packages from cudaPackages
# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
getCudaPackages = p: with p; [
cuda_cccl
cuda_cudart
cuda_nvcc
cuda_nvml_dev
libcublas
libcufft
libcurand
];

# Builds CMake code to add CUDA paths for include and lib.
mkAppendCudaPaths = { version, pkgSet }:
let
# The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
# combine everything together for headers to work.
# It would be more convenient to use symlinkJoin on *just* the include subdirectories
# of each package, but not all of them have an include directory and making that work
# is more effort than it's worth for this temporary, build-time package.
combined = symlinkJoin {
name = "cuda-combined-${version}";
paths = pkgSet;
};
# The combined package above breaks the build for some reason so we just configure
# each package's library path.
libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
in ''
list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
list(APPEND Cuda${version}_LIB_PATHS ${libs})
'';

# gcc11 is required by DCGM's very particular build system
# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
in gcc11Stdenv.mkDerivation rec {
pname = "dcgm";
version = "3.1.8";

src = fetchFromGitHub {
owner = "NVIDIA";
repo = "DCGM";
rev = "refs/tags/v${version}";
hash = "sha256-OXqXkP2ZUNPzafGIgJ0MKa39xB84keVFFYl+JsHgnks=";
};

# Add our paths to the CUDA paths so FindCuda.cmake can find them.
EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
prePatch = ''
echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
'';

hardeningDisable = [ "all" ];

nativeBuildInputs = [
addOpenGLRunpath
cmake
git
python3

jsoncpp-static
jsoncpp-static.dev
libevent-nossl-static
libevent-nossl-static.dev
plog.dev # header-only
tclap_1_4 # header-only
];

buildInputs = [
catch2
fmt_9
yaml-cpp
];

# libcuda.so must be found at runtime because it is supplied by the NVIDIA
# driver. autoAddOpenGLRunpathHook breaks on the statically linked exes.
postFixup = ''
find "$out/bin" "$out/lib" -type f -executable -print0 | while IFS= read -r -d "" f; do
if isELF "$f" && [[ $(patchelf --print-needed "$f" || true) == *libcuda.so* ]]; then
addOpenGLRunpath "$f"
fi
done
'';

disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;

meta = with lib; {
description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs.";
homepage = "https://developer.nvidia.com/dcgm";
license = licenses.asl20;
maintainers = teams.deshaw.members;
mainProgram = "dcgmi";
platforms = platforms.linux;
};
}
66 changes: 66 additions & 0 deletions pkgs/servers/monitoring/prometheus/dcgm-exporter/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{ lib
, buildGoModule
, fetchFromGitHub
, cudaPackages
, dcgm
, linuxPackages
}:
buildGoModule rec {
pname = "dcgm-exporter";
version = "3.1.8-3.1.5";

src = fetchFromGitHub {
owner = "NVIDIA";
repo = pname;
rev = "refs/tags/${version}";
hash = "sha256-Jzv3cU3gmGIXV+DV3wV/1zSWwz18s3Jax6JC7WZW7Z4=";
};

# Upgrade to go 1.17 during the vendoring FOD build because it fails otherwise.
overrideModAttrs = _: {
preBuild = ''
substituteInPlace go.mod --replace 'go 1.16' 'go 1.17'
go mod tidy
'';
postInstall = ''
cp go.mod "$out/go.mod"
'';
};

CGO_LDFLAGS = "-ldcgm";

buildInputs = [
dcgm
];

# gonvml and go-dcgm do not work with ELF BIND_NOW hardening because not all
# symbols are available on startup.
hardeningDisable = [ "bindnow" ];

# Copy the modified go.mod we got from the vendoring process.
preBuild = ''
cp vendor/go.mod go.mod
'';

vendorHash = "sha256-KMCV79kUY1sNYysH0MmB7pVU98r7v+DpLIoYHxyyG4U=";

nativeBuildInputs = [
cudaPackages.autoAddOpenGLRunpathHook
];

# Tests try to interact with running DCGM service.
doCheck = false;

postFixup = ''
patchelf --add-needed libnvidia-ml.so "$out/bin/dcgm-exporter"
'';

meta = with lib; {
description = "NVIDIA GPU metrics exporter for Prometheus leveraging DCGM";
homepage = "https://github.com/NVIDIA/dcgm-exporter";
license = licenses.asl20;
maintainers = teams.deshaw.members;
mainProgram = "dcgm-exporter";
platforms = platforms.linux;
};
}
9 changes: 8 additions & 1 deletion pkgs/top-level/all-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,8 @@ with pkgs;

dbip-country-lite = callPackage ../data/misc/dbip-country-lite { };

dcgm = callPackage ../os-specific/linux/dcgm { };

dhallDirectoryToNix = callPackage ../build-support/dhall/directory-to-nix.nix { };

dhallPackageToNix = callPackage ../build-support/dhall/package-to-nix.nix { };
Expand Down Expand Up @@ -24858,7 +24860,11 @@ with pkgs;

taskflow = callPackage ../development/libraries/taskflow { };

tclap = callPackage ../development/libraries/tclap { };
tclap = tclap_1_2;

tclap_1_2 = callPackage ../development/libraries/tclap/1.2.nix { };

tclap_1_4 = callPackage ../development/libraries/tclap/1.4.nix { };

tcllib = callPackage ../development/libraries/tcllib { };

Expand Down Expand Up @@ -26687,6 +26693,7 @@ with pkgs;
prometheus-cloudflare-exporter = callPackage ../servers/monitoring/prometheus/cloudflare-exporter.nix { };
prometheus-collectd-exporter = callPackage ../servers/monitoring/prometheus/collectd-exporter.nix { };
prometheus-consul-exporter = callPackage ../servers/monitoring/prometheus/consul-exporter.nix { };
prometheus-dcgm-exporter = callPackage ../servers/monitoring/prometheus/dcgm-exporter { };
prometheus-dnsmasq-exporter = callPackage ../servers/monitoring/prometheus/dnsmasq-exporter.nix { };
prometheus-dovecot-exporter = callPackage ../servers/monitoring/prometheus/dovecot-exporter.nix { };
prometheus-domain-exporter = callPackage ../servers/monitoring/prometheus/domain-exporter.nix { };
Expand Down