Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions nixos/doc/manual/release-notes/rl-1909.xml
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,13 @@
<literal>nix-shell -p altcoins.dogecoin</literal>, etc.
</para>
</listitem>
<listitem>
Ceph has been upgraded to v14.2.1.
See the <link xlink:href="https://ceph.com/releases/v14-2-0-nautilus-released/">release notes</link> for details.
The mgr dashboard as well as osds backed by loop-devices is no longer explicitly supported by the package and module.
Note: There's been some issues with python-cherrypy, which is used by the dashboard
and prometheus mgr modules (and possibly others), hence 0000-dont-check-cherrypy-version.patch.
</listitem>
</itemizedlist>
</section>
</section>
110 changes: 73 additions & 37 deletions nixos/modules/services/network-filesystems/ceph.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
with lib;

let
ceph = pkgs.ceph;
cfg = config.services.ceph;

# function that translates "camelCaseOptions" to "camel case options", credits to tilpner in #nixos@freenode
translateOption = replaceStrings upperChars (map (s: " ${s}") lowerChars);
generateDaemonList = (daemonType: daemons: extraServiceConfig:
mkMerge (
map (daemon:
{ "ceph-${daemonType}-${daemon}" = generateServiceFile daemonType daemon cfg.global.clusterName ceph extraServiceConfig; }
) daemons
)
);
generateServiceFile = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: {
expandCamelCase = replaceStrings upperChars (map (s: " ${s}") lowerChars);
expandCamelCaseAttrs = mapAttrs' (name: value: nameValuePair (expandCamelCase name) value);

makeServices = (daemonType: daemonIds: extraServiceConfig:
mkMerge (map (daemonId:
{ "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph extraServiceConfig; })
daemonIds));

makeService = (daemonType: daemonId: clusterName: ceph: extraServiceConfig: {
enable = true;
description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}";
after = [ "network-online.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target";
Expand All @@ -34,23 +34,29 @@ let
Restart = "on-failure";
StartLimitBurst = "5";
StartLimitInterval = "30min";
ExecStart = "${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} -f --cluster ${clusterName} --id ${if daemonType == "rgw" then "client.${daemonId}" else daemonId} --setuser ceph --setgroup ceph";
ExecStart = ''${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} \
-f --cluster ${clusterName} --id ${daemonId} --setuser ceph \
--setgroup ${if daemonType == "osd" then "disk" else "ceph"}'';
} // extraServiceConfig
// optionalAttrs (daemonType == "osd") { ExecStartPre = "${ceph.out}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}"; };
} // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) { preStart = ''
// optionalAttrs (daemonType == "osd") { ExecStartPre = ''${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh \
--id ${daemonId} --cluster ${clusterName}''; };
} // optionalAttrs (builtins.elem daemonType [ "mds" "mon" "rgw" "mgr" ]) {
preStart = ''
daemonPath="/var/lib/ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"
if [ ! -d ''$daemonPath ]; then
mkdir -m 755 -p ''$daemonPath
chown -R ceph:ceph ''$daemonPath
if [ ! -d $daemonPath ]; then
mkdir -m 755 -p $daemonPath
chown -R ceph:ceph $daemonPath
fi
'';
} // optionalAttrs (daemonType == "osd") { path = [ pkgs.getopt ]; }
);
generateTargetFile = (daemonType:

makeTarget = (daemonType:
{
"ceph-${daemonType}" = {
description = "Ceph target allowing to start/stop all ceph-${daemonType} services at once";
partOf = [ "ceph.target" ];
wantedBy = [ "ceph.target" ];
before = [ "ceph.target" ];
};
}
Expand Down Expand Up @@ -82,6 +88,14 @@ in
'';
};

mgrModulePath = mkOption {
type = types.path;
default = "${pkgs.ceph.lib}/lib/ceph/mgr";
description = ''
Path at which to find ceph-mgr modules.
'';
};

monInitialMembers = mkOption {
type = with types; nullOr commas;
default = null;
Expand Down Expand Up @@ -157,6 +171,27 @@ in
A comma-separated list of subnets that will be used as cluster networks in the cluster.
'';
};

rgwMimeTypesFile = mkOption {
type = with types; nullOr path;
default = "${pkgs.mime-types}/etc/mime.types";
description = ''
Path to mime types used by radosgw.
'';
};
};

extraConfig = mkOption {
type = with types; attrsOf str;
default = {};
example = ''
{
"ms bind ipv6" = "true";
};
'';
description = ''
Extra configuration to add to the global section. Use for setting values that are common for all daemons in the cluster.
'';
};

mgr = {
Expand Down Expand Up @@ -216,6 +251,7 @@ in
to the id part in ceph i.e. [ "name1" ] would result in osd.name1
'';
};

extraConfig = mkOption {
type = with types; attrsOf str;
default = {
Expand Down Expand Up @@ -296,9 +332,6 @@ in
{ assertion = cfg.global.fsid != "";
message = "fsid has to be set to a valid uuid for the cluster to function";
}
{ assertion = cfg.mgr.enable == true;
message = "ceph 12.x requires atleast 1 MGR daemon enabled for the cluster to function";
}
{ assertion = cfg.mon.enable == true -> cfg.mon.daemons != [];
message = "have to set id of atleast one MON if you're going to enable Monitor";
}
Expand All @@ -317,14 +350,12 @@ in
''Not setting up a list of members in monInitialMembers requires that you set the host variable for each mon daemon or else the cluster won't function'';

environment.etc."ceph/ceph.conf".text = let
# Translate camelCaseOptions to the expected camel case option for ceph.conf
translatedGlobalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) cfg.global;
# Merge the extraConfig set for mgr daemons, as mgr don't have their own section
globalAndMgrConfig = translatedGlobalConfig // optionalAttrs cfg.mgr.enable cfg.mgr.extraConfig;
globalSection = expandCamelCaseAttrs (cfg.global // cfg.extraConfig // optionalAttrs cfg.mgr.enable cfg.mgr.extraConfig);
# Remove all name-value pairs with null values from the attribute set to avoid making empty sections in the ceph.conf
globalConfig = mapAttrs' (name: value: nameValuePair (translateOption name) value) (filterAttrs (name: value: value != null) globalAndMgrConfig);
globalSection' = filterAttrs (name: value: value != null) globalSection;
totalConfig = {
"global" = globalConfig;
"global" = globalSection';
} // optionalAttrs (cfg.mon.enable && cfg.mon.extraConfig != {}) { "mon" = cfg.mon.extraConfig; }
// optionalAttrs (cfg.mds.enable && cfg.mds.extraConfig != {}) { "mds" = cfg.mds.extraConfig; }
// optionalAttrs (cfg.osd.enable && cfg.osd.extraConfig != {}) { "osd" = cfg.osd.extraConfig; }
Expand All @@ -336,31 +367,36 @@ in
name = "ceph";
uid = config.ids.uids.ceph;
description = "Ceph daemon user";
group = "ceph";
extraGroups = [ "disk" ];
};

users.groups = singleton {
name = "ceph";
gid = config.ids.gids.ceph;
};

systemd.services = let
services = []
++ optional cfg.mon.enable (generateDaemonList "mon" cfg.mon.daemons { RestartSec = "10"; })
++ optional cfg.mds.enable (generateDaemonList "mds" cfg.mds.daemons { StartLimitBurst = "3"; })
++ optional cfg.osd.enable (generateDaemonList "osd" cfg.osd.daemons { StartLimitBurst = "30"; RestartSec = "20s"; })
++ optional cfg.rgw.enable (generateDaemonList "rgw" cfg.rgw.daemons { })
++ optional cfg.mgr.enable (generateDaemonList "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; });
++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons { RestartSec = "10"; })
++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons { StartLimitBurst = "3"; })
++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons { StartLimitBurst = "30";
RestartSec = "20s";
PrivateDevices = "no"; # osd needs disk access
})
++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons { })
++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons { StartLimitBurst = "3"; });
in
mkMerge services;

systemd.targets = let
targets = [
{ "ceph" = { description = "Ceph target allowing to start/stop all ceph service instances at once"; }; }
] ++ optional cfg.mon.enable (generateTargetFile "mon")
++ optional cfg.mds.enable (generateTargetFile "mds")
++ optional cfg.osd.enable (generateTargetFile "osd")
++ optional cfg.rgw.enable (generateTargetFile "rgw")
++ optional cfg.mgr.enable (generateTargetFile "mgr");
{ "ceph" = { description = "Ceph target allowing to start/stop all ceph service instances at once";
wantedBy = [ "multi-user.target" ]; }; }
] ++ optional cfg.mon.enable (makeTarget "mon")
++ optional cfg.mds.enable (makeTarget "mds")
++ optional cfg.osd.enable (makeTarget "osd")
++ optional cfg.rgw.enable (makeTarget "rgw")
++ optional cfg.mgr.enable (makeTarget "mgr");
in
mkMerge targets;

Expand Down
52 changes: 37 additions & 15 deletions nixos/tests/ceph.nix
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import ./make-test.nix ({pkgs, ...}: rec {
import ./make-test.nix ({pkgs, lib, ...}: rec {
name = "All-in-one-basic-ceph-cluster";
meta = with pkgs.stdenv.lib.maintainers; {
maintainers = [ lejonet ];
Expand All @@ -7,6 +7,7 @@ import ./make-test.nix ({pkgs, ...}: rec {
nodes = {
aio = { pkgs, ... }: {
virtualisation = {
memorySize = 1536;
emptyDiskImages = [ 20480 20480 ];
vlans = [ 1 ];
};
Expand All @@ -24,9 +25,6 @@ import ./make-test.nix ({pkgs, ...}: rec {
ceph
xfsprogs
];
nixpkgs.config.packageOverrides = super: {
ceph = super.ceph.override({ nss = super.nss; libxfs = super.libxfs; libaio = super.libaio; jemalloc = super.jemalloc; });
};

boot.kernelModules = [ "xfs" ];

Expand All @@ -51,6 +49,9 @@ import ./make-test.nix ({pkgs, ...}: rec {
enable = true;
daemons = [ "0" "1" ];
};

# So that we don't have to battle systemd when bootstraping
systemd.targets.ceph.wantedBy = lib.mkForce [];
};
};

Expand All @@ -61,24 +62,26 @@ import ./make-test.nix ({pkgs, ...}: rec {

# Create the ceph-related directories
$aio->mustSucceed(
"mkdir -p /var/lib/ceph/mgr/ceph-aio/",
"mkdir -p /var/lib/ceph/mon/ceph-aio/",
"mkdir -p /var/lib/ceph/osd/ceph-{0..1}/",
"chown ceph:ceph -R /var/lib/ceph/"
"mkdir -p /var/lib/ceph/mgr/ceph-aio",
"mkdir -p /var/lib/ceph/mon/ceph-aio",
"mkdir -p /var/lib/ceph/osd/ceph-{0,1}",
"chown ceph:ceph -R /var/lib/ceph/",
"mkdir -p /etc/ceph",
"chown ceph:ceph -R /etc/ceph"
);

# Bootstrap ceph-mon daemon
$aio->mustSucceed(
"mkdir -p /var/lib/ceph/bootstrap-osd && chown ceph:ceph /var/lib/ceph/bootstrap-osd",
"sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'",
"ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'",
"ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring",
"monmaptool --create --add aio 192.168.1.1 --fsid 066ae264-2a5d-4729-8001-6ad265f50b03 /tmp/monmap",
"sudo -u ceph ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'",
"sudo -u ceph ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring",
"monmaptool --create --add aio 192.168.1.1 --fsid 066ae264-2a5d-4729-8001-6ad265f50b03 /tmp/monmap",
"sudo -u ceph ceph-mon --mkfs -i aio --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring",
"touch /var/lib/ceph/mon/ceph-aio/done",
"sudo -u ceph touch /var/lib/ceph/mon/ceph-aio/done",
"systemctl start ceph-mon-aio"
);
$aio->waitForUnit("ceph-mon-aio");
$aio->mustSucceed("ceph mon enable-msgr2");

# Can't check ceph status until a mon is up
$aio->succeed("ceph -s | grep 'mon: 1 daemons'");
Expand All @@ -90,6 +93,7 @@ import ./make-test.nix ({pkgs, ...}: rec {
);
$aio->waitForUnit("ceph-mgr-aio");
$aio->waitUntilSucceeds("ceph -s | grep 'quorum aio'");
$aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active,'");

# Bootstrap both OSDs
$aio->mustSucceed(
Expand All @@ -112,8 +116,8 @@ import ./make-test.nix ({pkgs, ...}: rec {
"systemctl start ceph-osd-1"
);

$aio->waitUntilSucceeds("ceph osd stat | grep '2 osds: 2 up, 2 in'");
$aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active)'");
$aio->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'");
$aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active,'");
$aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'");

$aio->mustSucceed(
Expand All @@ -135,5 +139,23 @@ import ./make-test.nix ({pkgs, ...}: rec {
"ceph osd pool ls | grep 'aio-test'",
"ceph osd pool delete aio-other-test aio-other-test --yes-i-really-really-mean-it"
);

# As we disable the target in the config, we still want to test that it works as intended
$aio->mustSucceed(
"systemctl stop ceph-osd-0",
"systemctl stop ceph-osd-1",
"systemctl stop ceph-mgr-aio",
"systemctl stop ceph-mon-aio"
);
$aio->succeed("systemctl start ceph.target");
$aio->waitForUnit("ceph-mon-aio");
$aio->waitForUnit("ceph-mgr-aio");
$aio->waitForUnit("ceph-osd-0");
$aio->waitForUnit("ceph-osd-1");
$aio->succeed("ceph -s | grep 'mon: 1 daemons'");
$aio->waitUntilSucceeds("ceph -s | grep 'quorum aio'");
$aio->waitUntilSucceeds("ceph osd stat | grep -e '2 osds: 2 up[^,]*, 2 in'");
$aio->waitUntilSucceeds("ceph -s | grep 'mgr: aio(active,'");
$aio->waitUntilSucceeds("ceph -s | grep 'HEALTH_OK'");
'';
})
70 changes: 70 additions & 0 deletions pkgs/tools/filesystems/ceph/0000-dont-check-cherrypy-version.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py
index a8a3ec07c1..bcc9b86c37 100644
--- a/src/pybind/mgr/dashboard/module.py
+++ b/src/pybind/mgr/dashboard/module.py
@@ -25,40 +25,6 @@ except ImportError:

from .services.sso import load_sso_db

-# The SSL code in CherryPy 3.5.0 is buggy. It was fixed long ago,
-# but 3.5.0 is still shipping in major linux distributions
-# (Fedora 27, Ubuntu Xenial), so we must monkey patch it to get SSL working.
-if cherrypy is not None:
- v = StrictVersion(cherrypy.__version__)
- # It was fixed in 3.7.0. Exact lower bound version is probably earlier,
- # but 3.5.0 is what this monkey patch is tested on.
- if StrictVersion("3.5.0") <= v < StrictVersion("3.7.0"):
- from cherrypy.wsgiserver.wsgiserver2 import HTTPConnection,\
- CP_fileobject
-
- def fixed_init(hc_self, server, sock, makefile=CP_fileobject):
- hc_self.server = server
- hc_self.socket = sock
- hc_self.rfile = makefile(sock, "rb", hc_self.rbufsize)
- hc_self.wfile = makefile(sock, "wb", hc_self.wbufsize)
- hc_self.requests_seen = 0
-
- HTTPConnection.__init__ = fixed_init
-
-# When the CherryPy server in 3.2.2 (and later) starts it attempts to verify
-# that the ports its listening on are in fact bound. When using the any address
-# "::" it tries both ipv4 and ipv6, and in some environments (e.g. kubernetes)
-# ipv6 isn't yet configured / supported and CherryPy throws an uncaught
-# exception.
-if cherrypy is not None:
- v = StrictVersion(cherrypy.__version__)
- # the issue was fixed in 3.2.3. it's present in 3.2.2 (current version on
- # centos:7) and back to at least 3.0.0.
- if StrictVersion("3.1.2") <= v < StrictVersion("3.2.3"):
- # https://github.com/cherrypy/cherrypy/issues/1100
- from cherrypy.process import servers
- servers.wait_for_occupied_port = lambda host, port: None
-
if 'COVERAGE_ENABLED' in os.environ:
import coverage
__cov = coverage.Coverage(config_file="{}/.coveragerc".format(os.path.dirname(__file__)),
diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py
index b7fecf8d85..dfd4160591 100644
--- a/src/pybind/mgr/prometheus/module.py
+++ b/src/pybind/mgr/prometheus/module.py
@@ -18,20 +18,6 @@ from rbd import RBD
DEFAULT_ADDR = '::'
DEFAULT_PORT = 9283

-# When the CherryPy server in 3.2.2 (and later) starts it attempts to verify
-# that the ports its listening on are in fact bound. When using the any address
-# "::" it tries both ipv4 and ipv6, and in some environments (e.g. kubernetes)
-# ipv6 isn't yet configured / supported and CherryPy throws an uncaught
-# exception.
-if cherrypy is not None:
- v = StrictVersion(cherrypy.__version__)
- # the issue was fixed in 3.2.3. it's present in 3.2.2 (current version on
- # centos:7) and back to at least 3.0.0.
- if StrictVersion("3.1.2") <= v < StrictVersion("3.2.3"):
- # https://github.com/cherrypy/cherrypy/issues/1100
- from cherrypy.process import servers
- servers.wait_for_occupied_port = lambda host, port: None
-
# cherrypy likes to sys.exit on error. don't let it take us down too!
def os_exit_noop(*args, **kwargs):
pass
Loading