Skip to content

Commit 3a8dc91

Browse files
committed
Add concept of state overlays
In the OSTree model, executables go in `/usr`, state in `/var` and configuration in `/etc`. Software that lives in `/opt` however messes this up because it often mixes code *and* state, making it harder to manage. More generally, it's sometimes useful to have the OSTree commit contain code under a certain path, but still allow that path to be writable by software and the sysadmin at runtime (`/usr/local` is another instance). Add the concept of state overlays. A state overlay is an overlayfs mount whose upper directory, which contains unmanaged state, is carried forward on top of a lower directory, containing OSTree-managed files. In the example of `/usr/local`, OSTree commits can ship content there, all while allowing users to e.g. add scripts in `/usr/local/bin` when booted into that commit. Some reconciliation logic is executed whenever the base is updated so that newer files in the base are never shadowed by a copied up version in the upper directory. This matches RPM semantics when upgrading packages whose files may have been modified. For ease of integration, this is exposed as a systemd template unit which any downstream distro/user can enable. The instance name is the mountpath in escaped systemd path notation (e.g. `[email protected]`). See discussions in #3113 for more details.
1 parent 234fa2c commit 3a8dc91

File tree

7 files changed

+431
-0
lines changed

7 files changed

+431
-0
lines changed

Makefile-boot.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
4242
src/boot/ostree-finalize-staged.service \
4343
src/boot/ostree-finalize-staged.path \
4444
src/boot/ostree-finalize-staged-hold.service \
45+
src/boot/[email protected] \
4546
$(NULL)
4647
systemdtmpfilesdir = $(prefix)/lib/tmpfiles.d
4748
dist_systemdtmpfiles_DATA = src/boot/ostree-tmpfiles.conf
@@ -72,6 +73,7 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \
7273
src/boot/ostree-remount.service \
7374
src/boot/ostree-finalize-staged.service \
7475
src/boot/ostree-finalize-staged-hold.service \
76+
src/boot/[email protected] \
7577
src/boot/grub2/grub2-15_ostree \
7678
src/boot/grub2/ostree-grub-generator \
7779
$(NULL)

Makefile-ostree.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ ostree_SOURCES += \
8585
src/ostree/ot-admin-builtin-post-copy.c \
8686
src/ostree/ot-admin-builtin-upgrade.c \
8787
src/ostree/ot-admin-builtin-unlock.c \
88+
src/ostree/ot-admin-builtin-state-overlay.c \
8889
src/ostree/ot-admin-builtins.h \
8990
src/ostree/ot-admin-instutil-builtin-selinux-ensure-labeled.c \
9091
src/ostree/ot-admin-instutil-builtin-set-kargs.c \

src/boot/[email protected]

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright (C) 2023 Red Hat Inc.
2+
#
3+
# This library is free software; you can redistribute it and/or
4+
# modify it under the terms of the GNU Lesser General Public
5+
# License as published by the Free Software Foundation; either
6+
# version 2 of the License, or (at your option) any later version.
7+
#
8+
# This library is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
# Lesser General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU Lesser General Public
14+
# License along with this library. If not, see <https://www.gnu.org/licenses/>.
15+
16+
[Unit]
17+
Description=OSTree State Overlay On /%I
18+
Documentation=man:ostree(1)
19+
DefaultDependencies=no
20+
ConditionKernelCommandLine=ostree
21+
# run after /var is setup since that's where the upperdir is stored
22+
# and after boot.mount so we can load the sysroot
23+
After=var.mount boot.mount
24+
# but before local-fs.target, which we consider ourselves a part of
25+
Before=local-fs.target
26+
27+
[Service]
28+
Type=oneshot
29+
RemainAfterExit=yes
30+
ExecStart=/usr/bin/ostree admin state-overlay %i /%I
31+
StandardInput=null
32+
StandardOutput=journal
33+
StandardError=journal+console
34+
35+
[Install]
36+
WantedBy=local-fs.target
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
/* Copyright (C) 2023 Red Hat, Inc.
2+
*
3+
* SPDX-License-Identifier: LGPL-2.0+
4+
*
5+
* This library is free software; you can redistribute it and/or
6+
* modify it under the terms of the GNU Lesser General Public
7+
* License as published by the Free Software Foundation; either
8+
* version 2 of the License, or (at your option) any later version.
9+
*
10+
* This library is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+
* Lesser General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU Lesser General Public
16+
* License along with this library. If not, see <https://www.gnu.org/licenses/>.
17+
*/
18+
19+
#include "config.h"
20+
21+
#include <fcntl.h>
22+
#include <glib-unix.h>
23+
#include <sched.h>
24+
#include <stdlib.h>
25+
#include <sys/mount.h>
26+
27+
#include "glnx-errors.h"
28+
#include "glnx-fdio.h"
29+
#include "glnx-local-alloc.h"
30+
#include "glnx-shutil.h"
31+
#include "glnx-xattrs.h"
32+
#include "ot-admin-builtins.h"
33+
34+
#define OSTREE_STATEOVERLAYS_DIR "/var/ostree/state-overlays"
35+
#define OSTREE_STATEOVERLAY_UPPER_DIR "upper"
36+
#define OSTREE_STATEOVERLAY_WORK_DIR "work"
37+
38+
/* https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html */
39+
#define OVERLAYFS_DIR_XATTR_OPAQUE "trusted.overlay.opaque"
40+
41+
static GOptionEntry options[] = { { NULL } };
42+
43+
static gboolean
44+
ensure_overlay_dirs (const char *overlay_dir, int *out_overlay_dfd, GCancellable *cancellable,
45+
GError **error)
46+
{
47+
glnx_autofd int overlay_dfd = -1;
48+
if (!glnx_shutil_mkdir_p_at_open (AT_FDCWD, overlay_dir, 0755, &overlay_dfd, cancellable, error))
49+
return FALSE;
50+
51+
if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_WORK_DIR, 0755, cancellable, error))
52+
return FALSE;
53+
if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, 0755, cancellable,
54+
error))
55+
return FALSE;
56+
57+
*out_overlay_dfd = glnx_steal_fd (&overlay_dfd);
58+
return TRUE;
59+
}
60+
61+
static gboolean
62+
is_opaque_dir (int dfd, const char *dname, gboolean *out_is_opaque, GError **error)
63+
{
64+
/* XXX: this is basically like a `glnx_lgetxattrat_allow_noent()`; upstream it */
65+
66+
char pathbuf[PATH_MAX];
67+
snprintf (pathbuf, sizeof (pathbuf), "/proc/self/fd/%d/%s", dfd, dname);
68+
69+
ssize_t bytes_read, real_size;
70+
if (TEMP_FAILURE_RETRY (bytes_read = lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, NULL, 0))
71+
< 0)
72+
{
73+
if (errno != ENODATA)
74+
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE);
75+
*out_is_opaque = FALSE;
76+
return TRUE;
77+
}
78+
79+
g_autofree guint8 *buf = g_malloc (bytes_read);
80+
if (TEMP_FAILURE_RETRY (real_size
81+
= lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, buf, bytes_read))
82+
< 0)
83+
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE);
84+
85+
*out_is_opaque = (real_size == 1 && buf[0] == 'y');
86+
return TRUE;
87+
}
88+
89+
static gboolean
90+
prune_upperdir_recurse (int lower_dfd, int upper_dfd, GCancellable *cancellable, GError **error)
91+
{
92+
g_auto (GLnxDirFdIterator) dfd_iter = { 0 };
93+
if (!glnx_dirfd_iterator_init_at (upper_dfd, ".", FALSE, &dfd_iter, error))
94+
return FALSE;
95+
96+
while (TRUE)
97+
{
98+
struct dirent *dent = NULL;
99+
if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, cancellable, error))
100+
return FALSE;
101+
if (dent == NULL)
102+
break;
103+
104+
/* do we have an entry of the same name in the lowerdir? */
105+
struct stat stbuf;
106+
if (!glnx_fstatat_allow_noent (lower_dfd, dent->d_name, &stbuf, AT_SYMLINK_NOFOLLOW, error))
107+
return FALSE;
108+
if (errno == ENOENT)
109+
continue; /* state file (i.e. upperdir only); carry on */
110+
111+
/* ok, it shadows; are they both directories? */
112+
if (dent->d_type == DT_DIR && S_ISDIR (stbuf.st_mode))
113+
{
114+
/* is the directory opaque? */
115+
gboolean is_opaque = FALSE;
116+
if (!is_opaque_dir (upper_dfd, dent->d_name, &is_opaque, error))
117+
return FALSE;
118+
119+
if (!is_opaque)
120+
{
121+
/* recurse */
122+
glnx_autofd int lower_subdfd = -1;
123+
if (!glnx_opendirat (lower_dfd, dent->d_name, FALSE, &lower_subdfd, error))
124+
return FALSE;
125+
glnx_autofd int upper_subdfd = -1;
126+
if (!glnx_opendirat (upper_dfd, dent->d_name, FALSE, &upper_subdfd, error))
127+
return FALSE;
128+
if (!prune_upperdir_recurse (lower_subdfd, upper_subdfd, cancellable, error))
129+
return glnx_prefix_error (error, "in %s", dent->d_name);
130+
131+
continue;
132+
}
133+
134+
/* fallthrough; implicitly delete opaque directories */
135+
}
136+
137+
/* any other case, we prune (this also implicitly covers whiteouts and opaque dirs) */
138+
if (dent->d_type == DT_DIR)
139+
{
140+
if (!glnx_shutil_rm_rf_at (upper_dfd, dent->d_name, cancellable, error))
141+
return FALSE;
142+
}
143+
/* just unlinkat(); saves one openat() call */
144+
else if (!glnx_unlinkat (upper_dfd, dent->d_name, 0, error))
145+
return FALSE;
146+
}
147+
148+
return TRUE;
149+
}
150+
151+
static gboolean
152+
prune_upperdir (int sysroot_fd, const char *deployment_path, const char *mountpath, int overlay_dfd,
153+
GCancellable *cancellable, GError **error)
154+
{
155+
glnx_autofd int lower_dfd = -1;
156+
if (!glnx_opendirat (AT_FDCWD, mountpath, FALSE, &lower_dfd, error))
157+
return FALSE;
158+
159+
glnx_autofd int upper_dfd = -1;
160+
if (!glnx_opendirat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, FALSE, &upper_dfd, error))
161+
return FALSE;
162+
163+
if (!prune_upperdir_recurse (lower_dfd, upper_dfd, cancellable, error))
164+
return FALSE;
165+
166+
/* touch upperdir to mark prune as completed */
167+
if (utimensat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, NULL, 0) < 0)
168+
return glnx_throw_errno_prefix (error, "futimens(upper)");
169+
170+
return TRUE;
171+
}
172+
173+
static gboolean
174+
mount_overlay (const char *mountpath, const char *name, GError **error)
175+
{
176+
/* we could use /proc/self/... with overlay_dfd to avoid these allocations,
177+
* but this gets stringified into the options field in the mount table, and
178+
* being cryptic is not helpful */
179+
g_autofree char *upperdir
180+
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_UPPER_DIR, NULL);
181+
g_autofree char *workdir
182+
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_WORK_DIR, NULL);
183+
g_autofree char *ovl_options
184+
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", mountpath, upperdir, workdir);
185+
if (mount ("overlay", mountpath, "overlay", MS_SILENT, ovl_options) < 0)
186+
return glnx_throw_errno_prefix (error, "mount(%s)", mountpath);
187+
188+
return TRUE;
189+
}
190+
191+
/* Called by [email protected]. */
192+
gboolean
193+
ot_admin_builtin_state_overlay (int argc, char **argv, OstreeCommandInvocation *invocation,
194+
GCancellable *cancellable, GError **error)
195+
{
196+
g_autoptr (GOptionContext) context = g_option_context_new ("NAME MOUNTPATH");
197+
g_autoptr (OstreeSysroot) sysroot = NULL;
198+
199+
/* First parse the args without loading the sysroot to see what options are
200+
* set. */
201+
if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
202+
OSTREE_ADMIN_BUILTIN_FLAG_NONE, invocation, &sysroot,
203+
cancellable, error))
204+
return FALSE;
205+
206+
if (argc < 3)
207+
return glnx_throw (error, "Missing NAME or MOUNTPATH");
208+
209+
/* Sanity-check */
210+
OstreeDeployment *booted_deployment = ostree_sysroot_get_booted_deployment (sysroot);
211+
if (booted_deployment == NULL)
212+
return glnx_throw (error, "Must be booted into an OSTree deployment");
213+
214+
const char *overlay_name = argv[1];
215+
const char *mountpath = argv[2];
216+
217+
glnx_autofd int overlay_dfd = -1;
218+
g_autofree char *overlay_dir = g_build_filename (OSTREE_STATEOVERLAYS_DIR, overlay_name, NULL);
219+
if (!ensure_overlay_dirs (overlay_dir, &overlay_dfd, cancellable, error))
220+
return FALSE;
221+
222+
struct stat stbuf_upper;
223+
if (!glnx_fstatat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, &stbuf_upper, 0, error))
224+
return FALSE;
225+
226+
/* We don't use "/" directly here because that may have e.g. an overlay
227+
* slapped on from root.transient or composefs. */
228+
g_autofree char *deployment_path
229+
= ostree_sysroot_get_deployment_dirpath (sysroot, booted_deployment);
230+
struct stat stbuf_lower;
231+
if (!glnx_fstatat (ostree_sysroot_get_fd (sysroot), deployment_path, &stbuf_lower, 0, error))
232+
return FALSE;
233+
234+
if (stbuf_upper.st_mtime < stbuf_lower.st_mtime)
235+
{
236+
/* the lowerdir was updated; prune the upperdir */
237+
if (!prune_upperdir (ostree_sysroot_get_fd (sysroot), deployment_path, mountpath, overlay_dfd,
238+
cancellable, error))
239+
return glnx_prefix_error (error, "Pruning upperdir for %s", overlay_name);
240+
}
241+
242+
return mount_overlay (mountpath, overlay_name, error);
243+
}

src/ostree/ot-admin-builtins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ BUILTINPROTO (upgrade);
5050
BUILTINPROTO (kargs);
5151
BUILTINPROTO (post_copy);
5252
BUILTINPROTO (lock_finalization);
53+
BUILTINPROTO (state_overlay);
5354

5455
#undef BUILTINPROTO
5556

src/ostree/ot-builtin-admin.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ static OstreeCommand admin_subcommands[] = {
4242
"Change the finalization locking state of the staged deployment" },
4343
{ "boot-complete", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
4444
ot_admin_builtin_boot_complete, "Internal command to run at boot after an update was applied" },
45+
{ "state-overlay", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN,
46+
ot_admin_builtin_state_overlay, "Internal command to assemble a state overlay" },
4547
{ "init-fs", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_init_fs,
4648
"Initialize a root filesystem" },
4749
{ "instutil", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN, ot_admin_builtin_instutil,

0 commit comments

Comments
 (0)