diff --git a/lib/src/async_util.rs b/lib/src/async_util.rs index f5d5b7d4..8aed32c3 100644 --- a/lib/src/async_util.rs +++ b/lib/src/async_util.rs @@ -1,6 +1,6 @@ use std::io::prelude::*; use std::pin::Pin; -use tokio::io::{AsyncRead, AsyncReadExt}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; /// A [`std::io::Read`] implementation backed by an asynchronous source. pub(crate) struct ReadBridge { @@ -26,6 +26,35 @@ impl ReadBridge { } } +/// A [`std::io::Write`] implementation backed by an asynchronous source. +pub(crate) struct WriteBridge { + w: Pin>, + rt: tokio::runtime::Handle, +} + +impl Write for WriteBridge { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let w = &mut self.w; + self.rt.block_on(async { w.write(buf).await }) + } + + fn flush(&mut self) -> std::io::Result<()> { + let w = &mut self.w; + self.rt.block_on(async { w.flush().await }) + } +} + +impl WriteBridge { + /// Create a [`std::io::Write`] implementation backed by an asynchronous source. + /// + /// This is useful with e.g. [`tokio::task::spawn_blocking`]. + pub(crate) fn new(reader: T) -> Self { + let w = Box::pin(reader); + let rt = tokio::runtime::Handle::current(); + WriteBridge { w, rt } + } +} + #[cfg(test)] mod test { use std::convert::TryInto; diff --git a/lib/src/tar/write.rs b/lib/src/tar/write.rs index 31cd0bdb..b4ae95bd 100644 --- a/lib/src/tar/write.rs +++ b/lib/src/tar/write.rs @@ -7,33 +7,46 @@ //! In the future, this may also evolve into parsing the tar //! stream in Rust, not in C. +use crate::async_util::{ReadBridge, WriteBridge}; use crate::cmdext::CommandRedirectionExt; use crate::Result; use anyhow::{anyhow, Context}; +use camino::{Utf8Component, Utf8Path, Utf8PathBuf}; use ostree::gio; use ostree::prelude::FileExt; +use std::collections::BTreeMap; +use std::convert::TryInto; +use std::io::{BufWriter, Write}; use std::os::unix::prelude::AsRawFd; use std::path::Path; -use tokio::io::AsyncReadExt; +use std::process::Stdio; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite}; use tracing::instrument; /// Configuration for tar layer commits. #[derive(Debug, Default)] -pub struct WriteTarOptions<'a> { +pub struct WriteTarOptions { /// Base ostree commit hash - pub base: Option<&'a str>, + pub base: Option, /// Enable SELinux labeling from the base commit /// Requires the `base` option. pub selinux: bool, } -struct TempSePolicy { - tempdir: tempfile::TempDir, +/// The result of writing a tar stream. +/// +/// This includes some basic data on the number of files that were filtered +/// out because they were not in `/usr`. +pub struct WriteTarResult { + /// The resulting OSTree commit SHA-256. + pub commit: String, + /// Number of paths in a prefix (e.g. `/var` or `/boot`) which were discarded. + pub filtered: BTreeMap, } // Copy of logic from https://github.com/ostreedev/ostree/pull/2447 // to avoid waiting for backport + releases -fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result { +fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result { let cancellable = gio::NONE_CANCELLABLE; let policypath = "usr/etc/selinux"; let tempdir = tempfile::tempdir()?; @@ -49,7 +62,116 @@ fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result { }; repo.checkout_at(Some(&opts), ostree::AT_FDCWD, policydest, base, cancellable)?; } - Ok(TempSePolicy { tempdir: tempdir }) + Ok(tempdir) +} + +#[derive(Debug)] +enum NormalizedPathResult<'a> { + Filtered(&'a str), + Normal(Utf8PathBuf), +} + +fn normalize_validate_path<'a>(path: &'a Utf8Path) -> Result> { + // This converts e.g. `foo//bar/./baz` into `foo/bar/baz`. + let mut components = path + .components() + .map(|part| { + match part { + // Convert absolute paths to relative + camino::Utf8Component::RootDir => Ok(camino::Utf8Component::CurDir), + // Allow ./ and regular parts + camino::Utf8Component::Normal(_) | camino::Utf8Component::CurDir => Ok(part), + // Barf on Windows paths as well as Unix path uplinks `..` + _ => Err(anyhow!("Invalid path: {}", path)), + } + }) + .peekable(); + let mut ret = Utf8PathBuf::new(); + // Insert a leading `./` if not present + if let Some(Ok(camino::Utf8Component::Normal(_))) = components.peek() { + ret.push(camino::Utf8Component::CurDir); + } + let mut found_first = false; + for part in components { + let part = part?; + if !found_first { + if let Utf8Component::Normal(part) = part { + found_first = true; + // Now, rewrite /etc -> /usr/etc, and discard everything not in /usr. + match part { + "usr" => ret.push(part), + "etc" => { + ret.push("usr/etc"); + } + o => return Ok(NormalizedPathResult::Filtered(o)), + } + } else { + ret.push(part); + } + } else { + ret.push(part); + } + } + + Ok(NormalizedPathResult::Normal(ret)) +} + +/// Perform various filtering on imported tar archives. +/// - Move /etc to /usr/etc +/// - Entirely drop files not in /usr +/// +/// This also acts as a Rust "pre-parser" of the tar archive, hopefully +/// catching anything corrupt that might be exploitable from the C libarchive side. +/// Remember that we're parsing this while we're downloading it, and in order +/// to verify integrity we rely on the total sha256 of the blob, so all content +/// written before then must be considered untrusted. +fn filter_tar(src: impl std::io::Read, dest: impl std::io::Write) -> Result> { + let src = std::io::BufReader::new(src); + let mut src = tar::Archive::new(src); + let dest = BufWriter::new(dest); + let mut dest = tar::Builder::new(dest); + let mut filtered = BTreeMap::new(); + + let ents = src.entries()?; + for entry in ents { + let entry = entry?; + let path = entry.path()?; + let path: &Utf8Path = (&*path).try_into()?; + + let normalized = match normalize_validate_path(path)? { + NormalizedPathResult::Filtered(path) => { + if let Some(v) = filtered.get_mut(path) { + *v += 1; + } else { + filtered.insert(path.to_string(), 1); + } + continue; + } + NormalizedPathResult::Normal(path) => path, + }; + + let mut header = entry.header().clone(); + dest.append_data(&mut header, normalized, entry)?; + } + dest.into_inner()?.flush()?; + Ok(filtered) +} + +/// Asynchronous wrapper for filter_tar() +async fn filter_tar_async( + src: impl AsyncRead + Send + 'static, + mut dest: impl AsyncWrite + Send + Unpin, +) -> Result> { + let (tx_buf, mut rx_buf) = tokio::io::duplex(8192); + let tar_transformer = tokio::task::spawn_blocking(move || -> Result<_> { + let src = ReadBridge::new(src); + let dest = WriteBridge::new(tx_buf); + filter_tar(src, dest) + }); + let copier = tokio::io::copy(&mut rx_buf, &mut dest); + let (r, v) = tokio::join!(tar_transformer, copier); + let _v: u64 = v?; + Ok(r??) } /// Write the contents of a tarball as an ostree commit. @@ -57,15 +179,15 @@ fn sepolicy_from_base(repo: &ostree::Repo, base: &str) -> Result { #[instrument(skip(repo, src))] pub async fn write_tar( repo: &ostree::Repo, - mut src: impl tokio::io::AsyncRead + Send + Unpin + 'static, + src: impl tokio::io::AsyncRead + Send + Unpin + 'static, refname: &str, - options: Option>, -) -> Result { - use std::process::Stdio; + options: Option, +) -> Result { + let repo = repo.clone(); let options = options.unwrap_or_default(); let sepolicy = if options.selinux { if let Some(base) = options.base { - Some(sepolicy_from_base(repo, base).context("tar: Preparing sepolicy")?) + Some(sepolicy_from_base(&repo, &base).context("tar: Preparing sepolicy")?) } else { None } @@ -84,12 +206,11 @@ pub async fn write_tar( c.arg("--repo=/proc/self/fd/3"); if let Some(sepolicy) = sepolicy.as_ref() { c.arg("--selinux-policy"); - c.arg(sepolicy.tempdir.path()); + c.arg(sepolicy.path()); } c.args(&[ "--no-bindings", "--tar-autocreate-parents", - r#"--tar-pathname-filter=^etc(.*),usr/etc\1"#, "--tree=tar=/proc/self/fd/0", "--branch", refname, @@ -99,15 +220,11 @@ pub async fn write_tar( c.kill_on_drop(true); let mut r = c.spawn()?; // Safety: We passed piped() for all of these - let mut child_stdin = r.stdin.take().unwrap(); + let child_stdin = r.stdin.take().unwrap(); let mut child_stdout = r.stdout.take().unwrap(); let mut child_stderr = r.stderr.take().unwrap(); - // Copy our input to child stdout - let input_copier = async move { - let _n = tokio::io::copy(&mut src, &mut child_stdin).await?; - drop(child_stdin); - Ok::<_, anyhow::Error>(()) - }; + // Copy the filtered tar stream to child stdin + let filtered_result = filter_tar_async(src, child_stdin); // Gather stdout/stderr to buffers let output_copier = async move { let mut child_stdout_buf = String::new(); @@ -119,7 +236,8 @@ pub async fn write_tar( Ok::<_, anyhow::Error>((child_stdout_buf, child_stderr_buf)) }; - let (_, (child_stdout, child_stderr)) = tokio::try_join!(input_copier, output_copier)?; + let (filtered_result, (child_stdout, child_stderr)) = + tokio::try_join!(filtered_result, output_copier)?; let status = r.wait().await?; // Ensure this lasted until the process exited drop(sepolicy); @@ -132,5 +250,78 @@ pub async fn write_tar( } // TODO: trim string in place let s = child_stdout.trim(); - Ok(s.to_string()) + Ok(WriteTarResult { + commit: s.to_string(), + filtered: filtered_result, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + fn test_normalize_path() { + let valid = &[ + ("/usr/bin/blah", "./usr/bin/blah"), + ("usr/bin/blah", "./usr/bin/blah"), + ("usr///share/.//blah", "./usr/share/blah"), + ("./", "."), + ]; + for &(k, v) in valid { + let r = normalize_validate_path(k.into()).unwrap(); + match r { + NormalizedPathResult::Filtered(o) => { + panic!("Case {} should not be filtered as {}", k, o) + } + NormalizedPathResult::Normal(p) => { + assert_eq!(v, p.as_str()); + } + } + } + let filtered = &[ + ("/boot/vmlinuz", "boot"), + ("var/lib/blah", "var"), + ("./var/lib/blah", "var"), + ]; + for &(k, v) in filtered { + match normalize_validate_path(k.into()).unwrap() { + NormalizedPathResult::Filtered(f) => { + assert_eq!(v, f); + } + NormalizedPathResult::Normal(_) => { + panic!("{} should be filtered", k) + } + } + } + let errs = &["usr/foo/../../bar"]; + for &k in errs { + assert!(normalize_validate_path(k.into()).is_err()); + } + } + + #[tokio::test] + async fn tar_filter() -> Result<()> { + let tempd = tempfile::tempdir()?; + let rootfs = &tempd.path().join("rootfs"); + std::fs::create_dir_all(rootfs.join("etc/systemd/system"))?; + std::fs::write(rootfs.join("etc/systemd/system/foo.service"), "fooservice")?; + std::fs::write(rootfs.join("blah"), "blah")?; + let rootfs_tar_path = &tempd.path().join("rootfs.tar"); + let rootfs_tar = std::fs::File::create(rootfs_tar_path)?; + let mut rootfs_tar = tar::Builder::new(rootfs_tar); + rootfs_tar.append_dir_all(".", rootfs)?; + let _ = rootfs_tar.into_inner()?; + let mut dest = Vec::new(); + let src = tokio::io::BufReader::new(tokio::fs::File::open(rootfs_tar_path).await?); + filter_tar_async(src, &mut dest).await?; + let dest = dest.as_slice(); + let mut final_tar = tar::Archive::new(Cursor::new(dest)); + let destdir = &tempd.path().join("destdir"); + final_tar.unpack(destdir)?; + assert!(destdir.join("usr/etc/systemd/system/foo.service").exists()); + assert!(!destdir.join("blah").exists()); + Ok(()) + } } diff --git a/lib/tests/it/main.rs b/lib/tests/it/main.rs index ec52837b..4f9c8b9d 100644 --- a/lib/tests/it/main.rs +++ b/lib/tests/it/main.rs @@ -263,36 +263,32 @@ async fn test_tar_import_export() -> Result<()> { #[tokio::test] async fn test_tar_write() -> Result<()> { let fixture = Fixture::new()?; - let r = ostree_ext::tar::write_tar(&fixture.destrepo, EXAMPLEOS_V0, "exampleos", None).await?; - // Here, we're importing a raw tarball into an ostree commit; this is a subtly different - // path than what we do above for the flow of "unpack tarball + ostree commit + export tar". - // But, they should be content-identical. - let (commitdata, _) = fixture.destrepo.load_commit(&r)?; - assert_eq!( - EXAMPLEOS_CONTENT_CHECKSUM, - ostree::commit_get_content_checksum(&commitdata) - .unwrap() - .as_str() - ); - // Test translating /etc to /usr/etc let tmpetc = fixture.path.join("tmproot/etc"); - let tmproot = tmpetc.parent().unwrap(); - let tmptar = fixture.path.join("testlayer.tar"); std::fs::create_dir_all(&tmpetc)?; std::fs::write(tmpetc.join("someconfig.conf"), b"")?; + let tmproot = tmpetc.parent().unwrap(); + let tmpvarlib = &tmproot.join("var/lib"); + std::fs::create_dir_all(tmpvarlib)?; + std::fs::write(tmpvarlib.join("foo.log"), "foolog")?; + std::fs::write(tmpvarlib.join("bar.log"), "barlog")?; + std::fs::create_dir_all(tmproot.join("boot"))?; + let tmptar = fixture.path.join("testlayer.tar"); bash!( "tar cf {tmptar} -C {tmproot} .", tmptar = tmptar.as_str(), tmproot = tmproot.as_str() )?; let src = tokio::fs::File::open(&tmptar).await?; - let layer_commit = ostree_ext::tar::write_tar(&fixture.destrepo, src, "layer", None).await?; + let r = ostree_ext::tar::write_tar(&fixture.destrepo, src, "layer", None).await?; bash!( "ostree --repo={repo} ls {layer_commit} /usr/etc/someconfig.conf >/dev/null", repo = fixture.destrepo_path.as_str(), - layer_commit = layer_commit.as_str() + layer_commit = r.commit.as_str() )?; + assert_eq!(r.filtered.len(), 2); + assert_eq!(*r.filtered.get("var").unwrap(), 4); + assert_eq!(*r.filtered.get("boot").unwrap(), 1); Ok(()) }