diff --git a/crates/kernel_cmdline/src/bytes.rs b/crates/kernel_cmdline/src/bytes.rs new file mode 100644 index 000000000..3d121895a --- /dev/null +++ b/crates/kernel_cmdline/src/bytes.rs @@ -0,0 +1,540 @@ +//! Byte-based kernel command line parsing utilities. +//! +//! This module provides functionality for parsing and working with kernel command line +//! arguments, supporting both key-only switches and key-value pairs with proper quote handling. + +use std::borrow::Cow; + +use crate::utf8; + +use anyhow::Result; + +/// A parsed kernel command line. +/// +/// Wraps the raw command line bytes and provides methods for parsing and iterating +/// over individual parameters. Uses copy-on-write semantics to avoid unnecessary +/// allocations when working with borrowed data. +#[derive(Debug)] +pub struct Cmdline<'a>(Cow<'a, [u8]>); + +impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Cmdline<'a> { + /// Creates a new `Cmdline` from any type that can be referenced as bytes. + /// + /// Uses borrowed data when possible to avoid unnecessary allocations. + fn from(input: &'a T) -> Self { + Self(Cow::Borrowed(input.as_ref())) + } +} + +impl<'a> From> for Cmdline<'a> { + /// Creates a new `Cmdline` from an owned `Vec`. + fn from(input: Vec) -> Self { + Self(Cow::Owned(input)) + } +} + +/// An iterator over kernel command line parameters. +/// +/// This is created by the `iter` method on `Cmdline`. +#[derive(Debug)] +pub struct CmdlineIter<'a>(&'a [u8]); + +impl<'a> Iterator for CmdlineIter<'a> { + type Item = Parameter<'a>; + + fn next(&mut self) -> Option { + let (param, rest) = Parameter::parse(self.0); + self.0 = rest; + param + } +} + +impl<'a> Cmdline<'a> { + /// Reads the kernel command line from `/proc/cmdline`. + /// + /// Returns an error if the file cannot be read or if there are I/O issues. + pub fn from_proc() -> Result { + Ok(Self(Cow::Owned(std::fs::read("/proc/cmdline")?))) + } + + /// Returns an iterator over all parameters in the command line. + /// + /// Properly handles quoted values containing whitespace and splits on + /// unquoted whitespace characters. Parameters are parsed as either + /// key-only switches or key=value pairs. + pub fn iter(&'a self) -> CmdlineIter<'a> { + CmdlineIter(&self.0) + } + + /// Returns an iterator over all parameters in the command line + /// which are valid UTF-8. + pub fn iter_utf8(&'a self) -> impl Iterator> { + self.iter() + .filter_map(|p| utf8::Parameter::try_from(p).ok()) + } + + /// Locate a kernel argument with the given key name. + /// + /// Returns the first parameter matching the given key, or `None` if not found. + /// Key comparison treats dashes and underscores as equivalent. + pub fn find(&'a self, key: impl AsRef<[u8]>) -> Option> { + let key = ParameterKey(key.as_ref()); + self.iter().find(|p| p.key == key) + } + + /// Locate a kernel argument with the given key name. + /// + /// Returns an error if a parameter with the given key name is + /// found, but the value is not valid UTF-8. + /// + /// Otherwise, returns the first parameter matching the given key, + /// or `None` if not found. Key comparison treats dashes and + /// underscores as equivalent. + pub fn find_utf8(&'a self, key: impl AsRef) -> Result>> { + let bytes = match self.find(key.as_ref()) { + Some(p) => p, + None => return Ok(None), + }; + + Ok(Some(utf8::Parameter::try_from(bytes)?)) + } + + /// Find all kernel arguments starting with the given prefix. + /// + /// This is a variant of [`Self::find`]. + pub fn find_all_starting_with + ?Sized>( + &'a self, + prefix: &'a T, + ) -> impl Iterator> + 'a { + self.iter() + .filter(move |p| p.key.0.starts_with(prefix.as_ref())) + } + + /// Locate the value of the kernel argument with the given key name. + /// + /// Returns the first value matching the given key, or `None` if not found. + /// Key comparison treats dashes and underscores as equivalent. + pub fn value_of(&'a self, key: impl AsRef<[u8]>) -> Option<&'a [u8]> { + self.find(key).and_then(|p| p.value) + } + + /// Find the value of the kernel argument with the provided name, which must be present. + /// + /// Otherwise the same as [`Self::value_of`]. + pub fn require_value_of(&'a self, key: impl AsRef<[u8]>) -> Result<&'a [u8]> { + let key = key.as_ref(); + self.value_of(key).ok_or_else(|| { + let key = String::from_utf8_lossy(key); + anyhow::anyhow!("Failed to find kernel argument '{key}'") + }) + } +} + +/// A single kernel command line parameter key +/// +/// Handles quoted values and treats dashes and underscores in keys as equivalent. +#[derive(Clone, Debug, Eq)] +pub struct ParameterKey<'a>(pub(crate) &'a [u8]); + +impl<'a> std::ops::Deref for ParameterKey<'a> { + type Target = [u8]; + + fn deref(&self) -> &'a Self::Target { + self.0 + } +} + +impl PartialEq for ParameterKey<'_> { + /// Compares two parameter keys for equality. + /// + /// Keys are compared with dashes and underscores treated as equivalent. + /// This comparison is case-sensitive. + fn eq(&self, other: &Self) -> bool { + let dedashed = |&c: &u8| { + if c == b'-' { + b'_' + } else { + c + } + }; + + // We can't just zip() because leading substrings will match + // + // For example, "foo" == "foobar" since the zipped iterator + // only compares the first three chars. + let our_iter = self.0.iter().map(dedashed); + let other_iter = other.0.iter().map(dedashed); + our_iter.eq(other_iter) + } +} + +/// A single kernel command line parameter. +#[derive(Debug, Eq)] +pub struct Parameter<'a> { + /// The parameter key as raw bytes + key: ParameterKey<'a>, + /// The parameter value as raw bytes, if present + value: Option<&'a [u8]>, +} + +impl<'a> Parameter<'a> { + /// Attempt to parse a single command line parameter from a slice + /// of bytes. + /// + /// The first tuple item contains the parsed parameter, or `None` + /// if a Parameter could not be constructed from the input. This + /// occurs when the input is either empty or contains only + /// whitespace. + /// + /// Any remaining bytes not consumed from the input are returned + /// as the second tuple item. + pub fn parse(input: &'a [u8]) -> (Option, &'a [u8]) { + let input = input.trim_ascii_start(); + + if input.is_empty() { + return (None, input); + } + + let mut in_quotes = false; + let end = input.iter().position(move |c| { + if *c == b'"' { + in_quotes = !in_quotes; + } + !in_quotes && c.is_ascii_whitespace() + }); + + let end = match end { + Some(end) => end, + None => input.len(), + }; + + let (input, rest) = input.split_at(end); + + let equals = input.iter().position(|b| *b == b'='); + + let ret = match equals { + None => Self { + key: ParameterKey(input), + value: None, + }, + Some(i) => { + let (key, mut value) = input.split_at(i); + let key = ParameterKey(key); + + // skip `=`, we know it's the first byte because we + // found it above + value = &value[1..]; + + // *Only* the first and last double quotes are stripped + value = { + let v = value.strip_prefix(b"\"").unwrap_or(value); + v.strip_suffix(b"\"").unwrap_or(v) + }; + + Self { + key, + value: Some(value), + } + } + }; + + (Some(ret), rest) + } + + /// Returns the key part of the parameter + pub fn key(&self) -> ParameterKey<'a> { + self.key.clone() + } + + /// Returns the optional value part of the parameter + pub fn value(&self) -> Option<&'a [u8]> { + self.value + } +} + +impl<'a> PartialEq for Parameter<'a> { + fn eq(&self, other: &Self) -> bool { + // Note we don't compare parameter because we want hyphen-dash insensitivity for the key + self.key == other.key && self.value == other.value + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // convenience methods for tests + fn param(s: &str) -> Parameter<'_> { + Parameter::parse(s.as_bytes()).0.unwrap() + } + + fn param_utf8(s: &str) -> utf8::Parameter<'_> { + utf8::Parameter::parse(s).0.unwrap() + } + + #[test] + fn test_parameter_parse() { + let (p, rest) = Parameter::parse(b"foo"); + let p = p.unwrap(); + assert_eq!(p.key.0, b"foo"); + assert_eq!(p.value, None); + assert_eq!(rest, "".as_bytes()); + + // should consume one parameter and return the rest of the input + let (p, rest) = Parameter::parse(b"foo=bar baz"); + let p = p.unwrap(); + assert_eq!(p.key.0, b"foo"); + assert_eq!(p.value, Some(b"bar".as_slice())); + assert_eq!(rest, " baz".as_bytes()); + + // should return None on empty or whitespace inputs + let (p, rest) = Parameter::parse(b""); + assert!(p.is_none()); + assert_eq!(rest, b"".as_slice()); + let (p, rest) = Parameter::parse(b" "); + assert!(p.is_none()); + assert_eq!(rest, b"".as_slice()); + } + + #[test] + fn test_parameter_simple() { + let switch = param("foo"); + assert_eq!(switch.key.0, b"foo"); + assert_eq!(switch.value, None); + + let kv = param("bar=baz"); + assert_eq!(kv.key.0, b"bar"); + assert_eq!(kv.value, Some(b"baz".as_slice())); + } + + #[test] + fn test_parameter_quoted() { + let p = param("foo=\"quoted value\""); + assert_eq!(p.value, Some(b"quoted value".as_slice())); + + let p = param("foo=\"unclosed quotes"); + assert_eq!(p.value, Some(b"unclosed quotes".as_slice())); + + let p = param("foo=trailing_quotes\""); + assert_eq!(p.value, Some(b"trailing_quotes".as_slice())); + } + + #[test] + fn test_parameter_extra_whitespace() { + let p = param(" foo=bar "); + assert_eq!(p.key.0, b"foo"); + assert_eq!(p.value, Some(b"bar".as_slice())); + } + + #[test] + fn test_parameter_internal_key_whitespace() { + let (p, rest) = Parameter::parse("foo bar=baz".as_bytes()); + let p = p.unwrap(); + assert_eq!(p.key.0, b"foo"); + assert_eq!(p.value, None); + assert_eq!(rest, b" bar=baz"); + } + + #[test] + fn test_parameter_pathological() { + // valid things that certified insane people would do + + // quotes don't get removed from keys + let p = param("\"\"\""); + assert_eq!(p.key.0, b"\"\"\""); + + // quotes only get stripped from the absolute ends of values + let p = param("foo=\"internal\"quotes\"are\"ok\""); + assert_eq!(p.value, Some(b"internal\"quotes\"are\"ok".as_slice())); + + // non-UTF8 things are in fact valid + let non_utf8_byte = b"\xff"; + #[allow(invalid_from_utf8)] + let failed_conversion = str::from_utf8(non_utf8_byte); + assert!(failed_conversion.is_err()); + let mut p = b"foo=".to_vec(); + p.push(non_utf8_byte[0]); + let (p, _rest) = Parameter::parse(&p); + let p = p.unwrap(); + assert_eq!(p.value, Some(non_utf8_byte.as_slice())); + } + + #[test] + fn test_parameter_equality() { + // substrings are not equal + let foo = param("foo"); + let bar = param("foobar"); + assert_ne!(foo, bar); + assert_ne!(bar, foo); + + // dashes and underscores are treated equally + let dashes = param("a-delimited-param"); + let underscores = param("a_delimited_param"); + assert_eq!(dashes, underscores); + + // same key, same values is equal + let dashes = param("a-delimited-param=same_values"); + let underscores = param("a_delimited_param=same_values"); + assert_eq!(dashes, underscores); + + // same key, different values is not equal + let dashes = param("a-delimited-param=different_values"); + let underscores = param("a_delimited_param=DiFfErEnT_valUEZ"); + assert_ne!(dashes, underscores); + + // mixed variants are never equal + let switch = param("same_key"); + let keyvalue = param("same_key=but_with_a_value"); + assert_ne!(switch, keyvalue); + } + + #[test] + fn test_kargs_simple() { + // example taken lovingly from: + // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/params.c?id=89748acdf226fd1a8775ff6fa2703f8412b286c8#n160 + let kargs = Cmdline::from(b"foo=bar,bar2 baz=fuz wiz".as_slice()); + let mut iter = kargs.iter(); + + assert_eq!(iter.next(), Some(param("foo=bar,bar2"))); + assert_eq!(iter.next(), Some(param("baz=fuz"))); + assert_eq!(iter.next(), Some(param("wiz"))); + assert_eq!(iter.next(), None); + + // Test the find API + assert_eq!(kargs.find("foo").unwrap().value.unwrap(), b"bar,bar2"); + assert!(kargs.find("nothing").is_none()); + } + + #[test] + fn test_kargs_iter_utf8() { + let kargs = Cmdline::from(b"foo=bar,bar2 \xff baz=fuz bad=oh\xffno wiz"); + let mut iter = kargs.iter_utf8(); + + assert_eq!(iter.next(), Some(param_utf8("foo=bar,bar2"))); + assert_eq!(iter.next(), Some(param_utf8("baz=fuz"))); + assert_eq!(iter.next(), Some(param_utf8("wiz"))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_kargs_find_utf8() { + let kargs = Cmdline::from(b"foo=bar,bar2 \xff baz=fuz bad=oh\xffno wiz"); + + // found it + assert_eq!( + kargs.find_utf8("foo").unwrap().unwrap().value().unwrap(), + "bar,bar2" + ); + + // didn't find it + assert!(kargs.find_utf8("nothing").unwrap().is_none()); + + // found it but key is invalid + let p = kargs.find_utf8("bad"); + assert_eq!( + p.unwrap_err().to_string(), + "Parameter value is not valid UTF-8" + ); + } + + #[test] + fn test_kargs_from_proc() { + let kargs = Cmdline::from_proc().unwrap(); + + // Not really a good way to test this other than assume + // there's at least one argument in /proc/cmdline wherever the + // tests are running + assert!(kargs.iter().count() > 0); + } + + #[test] + fn test_kargs_find_dash_hyphen() { + let kargs = Cmdline::from(b"a-b=1 a_b=2".as_slice()); + // find should find the first one, which is a-b=1 + let p = kargs.find("a_b").unwrap(); + assert_eq!(p.key.0, b"a-b"); + assert_eq!(p.value.unwrap(), b"1"); + let p = kargs.find("a-b").unwrap(); + assert_eq!(p.key.0, b"a-b"); + assert_eq!(p.value.unwrap(), b"1"); + + let kargs = Cmdline::from(b"a_b=2 a-b=1".as_slice()); + // find should find the first one, which is a_b=2 + let p = kargs.find("a_b").unwrap(); + assert_eq!(p.key.0, b"a_b"); + assert_eq!(p.value.unwrap(), b"2"); + let p = kargs.find("a-b").unwrap(); + assert_eq!(p.key.0, b"a_b"); + assert_eq!(p.value.unwrap(), b"2"); + } + + #[test] + fn test_kargs_extra_whitespace() { + let kargs = Cmdline::from(b" foo=bar baz=fuz wiz ".as_slice()); + let mut iter = kargs.iter(); + + assert_eq!(iter.next(), Some(param("foo=bar"))); + assert_eq!(iter.next(), Some(param("baz=fuz"))); + assert_eq!(iter.next(), Some(param("wiz"))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_value_of() { + let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); + + // Test existing key with value + assert_eq!(kargs.value_of("foo"), Some(b"bar".as_slice())); + assert_eq!(kargs.value_of("baz"), Some(b"qux".as_slice())); + + // Test key without value + assert_eq!(kargs.value_of("switch"), None); + + // Test non-existent key + assert_eq!(kargs.value_of("missing"), None); + + // Test dash/underscore equivalence + let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); + assert_eq!(kargs.value_of("dash_key"), Some(b"value1".as_slice())); + assert_eq!(kargs.value_of("under-key"), Some(b"value2".as_slice())); + } + + #[test] + fn test_require_value_of() { + let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); + + // Test existing key with value + assert_eq!(kargs.require_value_of("foo").unwrap(), b"bar"); + assert_eq!(kargs.require_value_of("baz").unwrap(), b"qux"); + + // Test key without value should fail + let err = kargs.require_value_of("switch").unwrap_err(); + assert!(err + .to_string() + .contains("Failed to find kernel argument 'switch'")); + + // Test non-existent key should fail + let err = kargs.require_value_of("missing").unwrap_err(); + assert!(err + .to_string() + .contains("Failed to find kernel argument 'missing'")); + + // Test dash/underscore equivalence + let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); + assert_eq!(kargs.require_value_of("dash_key").unwrap(), b"value1"); + assert_eq!(kargs.require_value_of("under-key").unwrap(), b"value2"); + } + + #[test] + fn test_find_all() { + let kargs = + Cmdline::from(b"foo=bar rd.foo=a rd.bar=b rd.baz rd.qux=c notrd.val=d".as_slice()); + let mut rd_args: Vec<_> = kargs.find_all_starting_with(b"rd.".as_slice()).collect(); + rd_args.sort_by(|a, b| a.key.0.cmp(b.key.0)); + assert_eq!(rd_args.len(), 4); + assert_eq!(rd_args[0], param("rd.bar=b")); + assert_eq!(rd_args[1], param("rd.baz")); + assert_eq!(rd_args[2], param("rd.foo=a")); + assert_eq!(rd_args[3], param("rd.qux=c")); + } +} diff --git a/crates/kernel_cmdline/src/lib.rs b/crates/kernel_cmdline/src/lib.rs index f304533eb..e984f0336 100644 --- a/crates/kernel_cmdline/src/lib.rs +++ b/crates/kernel_cmdline/src/lib.rs @@ -2,670 +2,13 @@ //! //! This module provides functionality for parsing and working with kernel command line //! arguments, supporting both key-only switches and key-value pairs with proper quote handling. +//! +//! The kernel command line is not required to be UTF-8. The `bytes` +//! module works on arbitrary byte data and attempts to parse the +//! command line in the same manner as the kernel itself. +//! +//! The `utf8` module performs the same functionality, but requires +//! all data to be valid UTF-8. -use std::borrow::Cow; - -use anyhow::Result; - -/// This is used by dracut. -pub const INITRD_ARG_PREFIX: &str = "rd."; -/// The kernel argument for configuring the rootfs flags. -pub const ROOTFLAGS: &str = "rootflags"; - -/// A parsed kernel command line. -/// -/// Wraps the raw command line bytes and provides methods for parsing and iterating -/// over individual parameters. Uses copy-on-write semantics to avoid unnecessary -/// allocations when working with borrowed data. -#[derive(Debug)] -pub struct Cmdline<'a>(Cow<'a, [u8]>); - -impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Cmdline<'a> { - /// Creates a new `Cmdline` from any type that can be referenced as bytes. - /// - /// Uses borrowed data when possible to avoid unnecessary allocations. - fn from(input: &'a T) -> Self { - Self(Cow::Borrowed(input.as_ref())) - } -} - -/// An iterator over kernel command line parameters. -/// -/// This is created by the `iter` method on `Cmdline`. -#[derive(Debug)] -pub struct CmdlineIter<'a>(&'a [u8]); - -impl<'a> Iterator for CmdlineIter<'a> { - type Item = Parameter<'a>; - - fn next(&mut self) -> Option { - let (param, rest) = Parameter::parse(self.0); - self.0 = rest; - param - } -} - -impl<'a> Cmdline<'a> { - /// Reads the kernel command line from `/proc/cmdline`. - /// - /// Returns an error if the file cannot be read or if there are I/O issues. - pub fn from_proc() -> Result { - Ok(Self(Cow::Owned(std::fs::read("/proc/cmdline")?))) - } - - /// Returns an iterator over all parameters in the command line. - /// - /// Properly handles quoted values containing whitespace and splits on - /// unquoted whitespace characters. Parameters are parsed as either - /// key-only switches or key=value pairs. - pub fn iter(&'a self) -> CmdlineIter<'a> { - CmdlineIter(&self.0) - } - - /// Locate a kernel argument with the given key name. - /// - /// Returns the first parameter matching the given key, or `None` if not found. - /// Key comparison treats dashes and underscores as equivalent. - pub fn find(&'a self, key: impl AsRef<[u8]>) -> Option> { - let key = ParameterKey(key.as_ref()); - self.iter().find(|p| p.key == key) - } - - /// Locate a kernel argument with the given key name that must be UTF-8. - /// - /// Otherwise the same as [`Self::find`]. - pub fn find_str(&'a self, key: &str) -> Option> { - let key = ParameterKeyStr(key); - self.iter() - .filter_map(|p| p.to_str()) - .find(move |p| p.key == key) - } - - /// Find all kernel arguments starting with the given prefix which must be UTF-8. - /// Non-UTF8 values are ignored. - /// - /// This is a variant of [`Self::find`]. - pub fn find_all_starting_with_str( - &'a self, - prefix: &'a str, - ) -> impl Iterator> + 'a { - self.iter() - .filter_map(|p| p.to_str()) - .filter(move |p| p.key.0.starts_with(prefix)) - } - - /// Locate the value of the kernel argument with the given key name. - /// - /// Returns the first value matching the given key, or `None` if not found. - /// Key comparison treats dashes and underscores as equivalent. - pub fn value_of(&'a self, key: impl AsRef<[u8]>) -> Option<&'a [u8]> { - self.find(key).and_then(|p| p.value) - } - - /// Locate the UTF-8 value of the kernel argument with the given key name. - /// - /// Returns the first value matching the given key, or `None` if not found. - /// Key comparison treats dashes and underscores as equivalent. - pub fn value_of_utf8(&'a self, key: &str) -> Result, std::str::Utf8Error> { - self.value_of(key).map(std::str::from_utf8).transpose() - } - - /// Find the value of the kernel argument with the provided name, which must be present. - /// - /// Otherwise the same as [`Self::value_of`]. - pub fn require_value_of(&'a self, key: impl AsRef<[u8]>) -> Result<&'a [u8]> { - let key = key.as_ref(); - self.value_of(key).ok_or_else(|| { - let key = String::from_utf8_lossy(key); - anyhow::anyhow!("Failed to find kernel argument '{key}'") - }) - } - - /// Find the UTF-8 value of the kernel argument with the provided - /// name, which must be present. - /// - /// Otherwise the same as [`Self::value_of_utf8`]. - pub fn require_value_of_utf8(&'a self, key: &str) -> Result<&'a str> { - self.value_of_utf8(key)? - .ok_or_else(|| anyhow::anyhow!("Failed to find kernel argument '{key}'")) - } -} - -/// A single kernel command line parameter key -/// -/// Handles quoted values and treats dashes and underscores in keys as equivalent. -#[derive(Debug, Eq)] -pub struct ParameterKey<'a>(&'a [u8]); - -impl<'a> std::ops::Deref for ParameterKey<'a> { - type Target = [u8]; - - fn deref(&self) -> &'a Self::Target { - self.0 - } -} - -/// A single kernel command line parameter key that is known to be UTF-8. -/// -/// Otherwise the same as [`ParameterKey`]. -#[derive(Debug, Eq)] -pub struct ParameterKeyStr<'a>(&'a str); - -/// A single kernel command line parameter. -#[derive(Debug, Eq)] -pub struct Parameter<'a> { - /// The full original value - pub parameter: &'a [u8], - /// The parameter key as raw bytes - pub key: ParameterKey<'a>, - /// The parameter value as raw bytes, if present - pub value: Option<&'a [u8]>, -} - -/// A single kernel command line parameter. -#[derive(Debug, PartialEq, Eq)] -pub struct ParameterStr<'a> { - /// The original value - pub parameter: &'a str, - /// The parameter key - pub key: ParameterKeyStr<'a>, - /// The parameter value, if present - pub value: Option<&'a str>, -} - -impl<'a> Parameter<'a> { - /// Attempt to parse a single command line parameter from a slice - /// of bytes. - /// - /// The first tuple item contains the parsed parameter, or `None` - /// if a Parameter could not be constructed from the input. This - /// occurs when the input is either empty or contains only - /// whitespace. - /// - /// Any remaining bytes not consumed from the input are returned - /// as the second tuple item. - pub fn parse(input: &'a [u8]) -> (Option, &'a [u8]) { - let input = input.trim_ascii_start(); - - if input.is_empty() { - return (None, input); - } - - let mut in_quotes = false; - let end = input.iter().position(move |c| { - if *c == b'"' { - in_quotes = !in_quotes; - } - !in_quotes && c.is_ascii_whitespace() - }); - - let end = match end { - Some(end) => end, - None => input.len(), - }; - - let (input, rest) = input.split_at(end); - - let equals = input.iter().position(|b| *b == b'='); - - let ret = match equals { - None => Self { - parameter: input, - key: ParameterKey(input), - value: None, - }, - Some(i) => { - let (key, mut value) = input.split_at(i); - let key = ParameterKey(key); - - // skip `=`, we know it's the first byte because we - // found it above - value = &value[1..]; - - // *Only* the first and last double quotes are stripped - value = value - .strip_prefix(b"\"") - .unwrap_or(value) - .strip_suffix(b"\"") - .unwrap_or(value); - - Self { - parameter: input, - key, - value: Some(value), - } - } - }; - - (Some(ret), rest) - } - - /// Convert this parameter to a UTF-8 string parameter, if possible. - /// - /// Returns `None` if the parameter contains invalid UTF-8. - pub fn to_str(&self) -> Option> { - let Ok(parameter) = std::str::from_utf8(self.parameter) else { - return None; - }; - Some(ParameterStr::from(parameter)) - } -} - -impl<'a> AsRef for ParameterStr<'a> { - fn as_ref(&self) -> &str { - self.parameter - } -} - -impl PartialEq for ParameterKey<'_> { - /// Compares two parameter keys for equality. - /// - /// Keys are compared with dashes and underscores treated as equivalent. - /// This comparison is case-sensitive. - fn eq(&self, other: &Self) -> bool { - let dedashed = |&c: &u8| { - if c == b'-' { - b'_' - } else { - c - } - }; - - // We can't just zip() because leading substrings will match - // - // For example, "foo" == "foobar" since the zipped iterator - // only compares the first three chars. - let our_iter = self.0.iter().map(dedashed); - let other_iter = other.0.iter().map(dedashed); - our_iter.eq(other_iter) - } -} - -impl<'a> ParameterStr<'a> { - /// This intentionally does not implement From<&'a str> to prevent - /// external users from constructing instances of `ParameterStr`. - /// We rely on the fact that it is derived from `Parameter` to - /// ensure that the input has already been split properly and that - /// the input contains only one command line parameter. - fn from(parameter: &'a str) -> Self { - let (key, value) = if let Some((key, value)) = parameter.split_once('=') { - let value = value - .strip_prefix('"') - .unwrap_or(value) - .strip_suffix('"') - .unwrap_or(value); - (key, Some(value)) - } else { - (parameter, None) - }; - let key = ParameterKeyStr(key); - ParameterStr { - parameter, - key, - value, - } - } -} - -impl<'a> PartialEq for Parameter<'a> { - fn eq(&self, other: &Self) -> bool { - // Note we don't compare parameter because we want hyphen-dash insensitivity for the key - self.key == other.key && self.value == other.value - } -} - -impl<'a> PartialEq for ParameterKeyStr<'a> { - fn eq(&self, other: &Self) -> bool { - ParameterKey(self.0.as_bytes()) == ParameterKey(other.0.as_bytes()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // convenience method for tests - fn param(s: &str) -> Parameter<'_> { - Parameter::parse(s.as_bytes()).0.unwrap() - } - - #[test] - fn test_parameter_parse() { - let (p, rest) = Parameter::parse(b"foo"); - let p = p.unwrap(); - assert_eq!(p.key.0, b"foo"); - assert_eq!(p.value, None); - assert_eq!(rest, "".as_bytes()); - - // should consume one parameter and return the rest of the input - let (p, rest) = Parameter::parse(b"foo=bar baz"); - let p = p.unwrap(); - assert_eq!(p.key.0, b"foo"); - assert_eq!(p.value, Some(b"bar".as_slice())); - assert_eq!(rest, " baz".as_bytes()); - - // should return None on empty or whitespace inputs - let (p, rest) = Parameter::parse(b""); - assert!(p.is_none()); - assert_eq!(rest, b"".as_slice()); - let (p, rest) = Parameter::parse(b" "); - assert!(p.is_none()); - assert_eq!(rest, b"".as_slice()); - } - - #[test] - fn test_parameter_simple() { - let switch = param("foo"); - assert_eq!(switch.key.0, b"foo"); - assert_eq!(switch.value, None); - - let kv = param("bar=baz"); - assert_eq!(kv.key.0, b"bar"); - assert_eq!(kv.value, Some(b"baz".as_slice())); - } - - #[test] - fn test_parameter_quoted() { - let p = param("foo=\"quoted value\""); - assert_eq!(p.value, Some(b"quoted value".as_slice())); - } - - #[test] - fn test_parameter_extra_whitespace() { - let p = param(" foo=bar "); - assert_eq!(p.key.0, b"foo"); - assert_eq!(p.value, Some(b"bar".as_slice())); - } - - #[test] - fn test_parameter_internal_key_whitespace() { - let (p, rest) = Parameter::parse("foo bar=baz".as_bytes()); - let p = p.unwrap(); - assert_eq!(p.key.0, b"foo"); - assert_eq!(p.value, None); - assert_eq!(rest, b" bar=baz"); - } - - #[test] - fn test_parameter_pathological() { - // valid things that certified insane people would do - - // quotes don't get removed from keys - let p = param("\"\"\""); - assert_eq!(p.key.0, b"\"\"\""); - - // quotes only get stripped from the absolute ends of values - let p = param("foo=\"internal\"quotes\"are\"ok\""); - assert_eq!(p.value, Some(b"internal\"quotes\"are\"ok".as_slice())); - - // non-UTF8 things are in fact valid - let non_utf8_byte = b"\xff"; - #[allow(invalid_from_utf8)] - let failed_conversion = str::from_utf8(non_utf8_byte); - assert!(failed_conversion.is_err()); - let mut p = b"foo=".to_vec(); - p.push(non_utf8_byte[0]); - let (p, _rest) = Parameter::parse(&p); - let p = p.unwrap(); - assert_eq!(p.value, Some(non_utf8_byte.as_slice())); - } - - #[test] - fn test_parameter_equality() { - // substrings are not equal - let foo = param("foo"); - let bar = param("foobar"); - assert_ne!(foo, bar); - assert_ne!(bar, foo); - - // dashes and underscores are treated equally - let dashes = param("a-delimited-param"); - let underscores = param("a_delimited_param"); - assert_eq!(dashes, underscores); - - // same key, same values is equal - let dashes = param("a-delimited-param=same_values"); - let underscores = param("a_delimited_param=same_values"); - assert_eq!(dashes, underscores); - - // same key, different values is not equal - let dashes = param("a-delimited-param=different_values"); - let underscores = param("a_delimited_param=DiFfErEnT_valUEZ"); - assert_ne!(dashes, underscores); - - // mixed variants are never equal - let switch = param("same_key"); - let keyvalue = param("same_key=but_with_a_value"); - assert_ne!(switch, keyvalue); - } - - #[test] - fn test_kargs_simple() { - // example taken lovingly from: - // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/params.c?id=89748acdf226fd1a8775ff6fa2703f8412b286c8#n160 - let kargs = Cmdline::from(b"foo=bar,bar2 baz=fuz wiz".as_slice()); - let mut iter = kargs.iter(); - - assert_eq!(iter.next(), Some(param("foo=bar,bar2"))); - assert_eq!(iter.next(), Some(param("baz=fuz"))); - assert_eq!(iter.next(), Some(param("wiz"))); - assert_eq!(iter.next(), None); - - // Test the find API - assert_eq!(kargs.find("foo").unwrap().value.unwrap(), b"bar,bar2"); - assert!(kargs.find("nothing").is_none()); - } - - #[test] - fn test_kargs_from_proc() { - let kargs = Cmdline::from_proc().unwrap(); - - // Not really a good way to test this other than assume - // there's at least one argument in /proc/cmdline wherever the - // tests are running - assert!(kargs.iter().count() > 0); - } - - #[test] - fn test_kargs_find_dash_hyphen() { - let kargs = Cmdline::from(b"a-b=1 a_b=2".as_slice()); - // find should find the first one, which is a-b=1 - let p = kargs.find("a_b").unwrap(); - assert_eq!(p.key.0, b"a-b"); - assert_eq!(p.value.unwrap(), b"1"); - let p = kargs.find("a-b").unwrap(); - assert_eq!(p.key.0, b"a-b"); - assert_eq!(p.value.unwrap(), b"1"); - - let kargs = Cmdline::from(b"a_b=2 a-b=1".as_slice()); - // find should find the first one, which is a_b=2 - let p = kargs.find("a_b").unwrap(); - assert_eq!(p.key.0, b"a_b"); - assert_eq!(p.value.unwrap(), b"2"); - let p = kargs.find("a-b").unwrap(); - assert_eq!(p.key.0, b"a_b"); - assert_eq!(p.value.unwrap(), b"2"); - } - - #[test] - fn test_kargs_extra_whitespace() { - let kargs = Cmdline::from(b" foo=bar baz=fuz wiz ".as_slice()); - let mut iter = kargs.iter(); - - assert_eq!(iter.next(), Some(param("foo=bar"))); - assert_eq!(iter.next(), Some(param("baz=fuz"))); - assert_eq!(iter.next(), Some(param("wiz"))); - assert_eq!(iter.next(), None); - } - - #[test] - fn test_value_of() { - let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); - - // Test existing key with value - assert_eq!(kargs.value_of("foo"), Some(b"bar".as_slice())); - assert_eq!(kargs.value_of("baz"), Some(b"qux".as_slice())); - - // Test key without value - assert_eq!(kargs.value_of("switch"), None); - - // Test non-existent key - assert_eq!(kargs.value_of("missing"), None); - - // Test dash/underscore equivalence - let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); - assert_eq!(kargs.value_of("dash_key"), Some(b"value1".as_slice())); - assert_eq!(kargs.value_of("under-key"), Some(b"value2".as_slice())); - } - - #[test] - fn test_value_of_utf8() { - let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); - - // Test existing key with UTF-8 value - assert_eq!(kargs.value_of_utf8("foo").unwrap(), Some("bar")); - assert_eq!(kargs.value_of_utf8("baz").unwrap(), Some("qux")); - - // Test key without value - assert_eq!(kargs.value_of_utf8("switch").unwrap(), None); - - // Test non-existent key - assert_eq!(kargs.value_of_utf8("missing").unwrap(), None); - - // Test dash/underscore equivalence - let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); - assert_eq!(kargs.value_of_utf8("dash_key").unwrap(), Some("value1")); - assert_eq!(kargs.value_of_utf8("under-key").unwrap(), Some("value2")); - - // Test invalid UTF-8 - let mut invalid_utf8 = b"invalid=".to_vec(); - invalid_utf8.push(0xff); - let kargs = Cmdline::from(&invalid_utf8); - assert!(kargs.value_of_utf8("invalid").is_err()); - } - - #[test] - fn test_require_value_of() { - let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); - - // Test existing key with value - assert_eq!(kargs.require_value_of("foo").unwrap(), b"bar"); - assert_eq!(kargs.require_value_of("baz").unwrap(), b"qux"); - - // Test key without value should fail - let err = kargs.require_value_of("switch").unwrap_err(); - assert!(err - .to_string() - .contains("Failed to find kernel argument 'switch'")); - - // Test non-existent key should fail - let err = kargs.require_value_of("missing").unwrap_err(); - assert!(err - .to_string() - .contains("Failed to find kernel argument 'missing'")); - - // Test dash/underscore equivalence - let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); - assert_eq!(kargs.require_value_of("dash_key").unwrap(), b"value1"); - assert_eq!(kargs.require_value_of("under-key").unwrap(), b"value2"); - } - - #[test] - fn test_require_value_of_utf8() { - let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice()); - - // Test existing key with UTF-8 value - assert_eq!(kargs.require_value_of_utf8("foo").unwrap(), "bar"); - assert_eq!(kargs.require_value_of_utf8("baz").unwrap(), "qux"); - - // Test key without value should fail - let err = kargs.require_value_of_utf8("switch").unwrap_err(); - assert!(err - .to_string() - .contains("Failed to find kernel argument 'switch'")); - - // Test non-existent key should fail - let err = kargs.require_value_of_utf8("missing").unwrap_err(); - assert!(err - .to_string() - .contains("Failed to find kernel argument 'missing'")); - - // Test dash/underscore equivalence - let kargs = Cmdline::from(b"dash-key=value1 under_key=value2".as_slice()); - assert_eq!(kargs.require_value_of_utf8("dash_key").unwrap(), "value1"); - assert_eq!(kargs.require_value_of_utf8("under-key").unwrap(), "value2"); - - // Test invalid UTF-8 should fail - let mut invalid_utf8 = b"invalid=".to_vec(); - invalid_utf8.push(0xff); - let kargs = Cmdline::from(&invalid_utf8); - assert!(kargs.require_value_of_utf8("invalid").is_err()); - } - - #[test] - fn test_find_str() { - let kargs = Cmdline::from(b"foo=bar baz=qux switch rd.break".as_slice()); - let p = kargs.find_str("foo").unwrap(); - assert_eq!(p, ParameterStr::from("foo=bar")); - assert_eq!(p.as_ref(), "foo=bar"); - let p = kargs.find_str("rd.break").unwrap(); - assert_eq!(p, ParameterStr::from("rd.break")); - assert!(kargs.find_str("missing").is_none()); - } - - #[test] - fn test_find_all_str() { - let kargs = - Cmdline::from(b"foo=bar rd.foo=a rd.bar=b rd.baz rd.qux=c notrd.val=d".as_slice()); - let mut rd_args: Vec<_> = kargs.find_all_starting_with_str("rd.").collect(); - rd_args.sort_by(|a, b| a.key.0.cmp(b.key.0)); - assert_eq!(rd_args.len(), 4); - assert_eq!(rd_args[0], ParameterStr::from("rd.bar=b")); - assert_eq!(rd_args[1], ParameterStr::from("rd.baz")); - assert_eq!(rd_args[2], ParameterStr::from("rd.foo=a")); - assert_eq!(rd_args[3], ParameterStr::from("rd.qux=c")); - } - - #[test] - fn test_param_to_str() { - let p = param("foo=bar"); - let p_str = p.to_str().unwrap(); - assert_eq!(p_str, ParameterStr::from("foo=bar")); - let non_utf8_byte = b"\xff"; - let mut p_u8 = b"foo=".to_vec(); - p_u8.push(non_utf8_byte[0]); - let (p, _rest) = Parameter::parse(&p_u8); - let p = p.unwrap(); - assert!(p.to_str().is_none()); - } - - #[test] - fn test_param_key_str_eq() { - let k1 = ParameterKeyStr("a-b"); - let k2 = ParameterKeyStr("a_b"); - assert_eq!(k1, k2); - let k1 = ParameterKeyStr("a-b"); - let k2 = ParameterKeyStr("a-c"); - assert_ne!(k1, k2); - } - - #[test] - fn test_kargs_non_utf8() { - let non_utf8_val = b"an_invalid_key=\xff"; - let mut kargs_bytes = b"foo=bar ".to_vec(); - kargs_bytes.extend_from_slice(non_utf8_val); - kargs_bytes.extend_from_slice(b" baz=qux"); - let kargs = Cmdline::from(kargs_bytes.as_slice()); - - // We should be able to find the valid kargs - assert_eq!(kargs.find_str("foo").unwrap().value, Some("bar")); - assert_eq!(kargs.find_str("baz").unwrap().value, Some("qux")); - - // But we should not find the invalid one via find_str - assert!(kargs.find("an_invalid_key").unwrap().to_str().is_none()); - - // And even using the raw find, trying to convert it to_str will fail. - let raw_param = kargs.find("an_invalid_key").unwrap(); - assert_eq!(raw_param.value.unwrap(), b"\xff"); - } -} +pub mod bytes; +pub mod utf8; diff --git a/crates/kernel_cmdline/src/utf8.rs b/crates/kernel_cmdline/src/utf8.rs new file mode 100644 index 000000000..84fff96aa --- /dev/null +++ b/crates/kernel_cmdline/src/utf8.rs @@ -0,0 +1,528 @@ +//! UTF-8-based kernel command line parsing utilities. +//! +//! This module provides functionality for parsing and working with kernel command line +//! arguments, supporting both key-only switches and key-value pairs with proper quote handling. + +use std::ops::Deref; + +use crate::bytes; + +use anyhow::Result; + +/// A parsed UTF-8 kernel command line. +/// +/// Wraps the raw command line bytes and provides methods for parsing and iterating +/// over individual parameters. Uses copy-on-write semantics to avoid unnecessary +/// allocations when working with borrowed data. +#[derive(Debug)] +pub struct Cmdline<'a>(bytes::Cmdline<'a>); + +impl<'a, T: AsRef + ?Sized> From<&'a T> for Cmdline<'a> { + /// Creates a new `Cmdline` from any type that can be referenced as `str`. + /// + /// Uses borrowed data when possible to avoid unnecessary allocations. + fn from(input: &'a T) -> Self { + Self(bytes::Cmdline::from(input.as_ref().as_bytes())) + } +} + +/// An iterator over UTF-8 kernel command line parameters. +/// +/// This is created by the `iter` method on `CmdlineUTF8`. +#[derive(Debug)] +pub struct CmdlineIter<'a>(bytes::CmdlineIter<'a>); + +impl<'a> Iterator for CmdlineIter<'a> { + type Item = Parameter<'a>; + + fn next(&mut self) -> Option { + self.0.next().map(Parameter::from_bytes) + } +} + +impl<'a> Cmdline<'a> { + /// Reads the kernel command line from `/proc/cmdline`. + /// + /// Returns an error if: + /// - The file cannot be read + /// - There are I/O issues + /// - The cmdline from proc is not valid UTF-8 + pub fn from_proc() -> Result { + let cmdline = std::fs::read("/proc/cmdline")?; + + // SAFETY: validate the value from proc is valid UTF-8. We + // don't need to save this, but checking now will ensure we + // can safely convert from the underlying bytes back to UTF-8 + // later. + str::from_utf8(&cmdline)?; + + Ok(Self(bytes::Cmdline::from(cmdline))) + } + + /// Returns an iterator over all parameters in the command line. + /// + /// Properly handles quoted values containing whitespace and splits on + /// unquoted whitespace characters. Parameters are parsed as either + /// key-only switches or key=value pairs. + pub fn iter(&'a self) -> CmdlineIter<'a> { + CmdlineIter(self.0.iter()) + } + + /// Locate a kernel argument with the given key name. + /// + /// Returns the first parameter matching the given key, or `None` if not found. + /// Key comparison treats dashes and underscores as equivalent. + pub fn find(&'a self, key: impl AsRef) -> Option> { + let key = ParameterKey::from(key.as_ref()); + self.iter().find(|p| p.key() == key) + } + + /// Find all kernel arguments starting with the given UTF-8 prefix. + /// + /// This is a variant of [`Self::find`]. + pub fn find_all_starting_with( + &'a self, + prefix: &'a str, + ) -> impl Iterator> + 'a { + self.iter().filter(move |p| p.key().starts_with(prefix)) + } + + /// Locate the value of the kernel argument with the given key name. + /// + /// Returns the first value matching the given key, or `None` if not found. + /// Key comparison treats dashes and underscores as equivalent. + pub fn value_of(&'a self, key: impl AsRef) -> Option<&'a str> { + self.0.value_of(key.as_ref().as_bytes()).map(|v| { + // SAFETY: We know this is valid UTF-8 since we only + // construct the underlying `bytes` from valid UTF-8 + str::from_utf8(v).expect("We only construct the underlying bytes from valid UTF-8") + }) + } + + /// Find the value of the kernel argument with the provided name, which must be present. + /// + /// Otherwise the same as [`Self::value_of`]. + pub fn require_value_of(&'a self, key: impl AsRef) -> Result<&'a str> { + let key = key.as_ref(); + self.value_of(key) + .ok_or_else(|| anyhow::anyhow!("Failed to find kernel argument '{key}'")) + } +} + +/// A single kernel command line parameter key +/// +/// Handles quoted values and treats dashes and underscores in keys as equivalent. +#[derive(Clone, Debug, Eq)] +pub struct ParameterKey<'a>(bytes::ParameterKey<'a>); + +impl<'a> std::ops::Deref for ParameterKey<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + // SAFETY: We know this is valid UTF-8 since we only + // construct the underlying `bytes` from valid UTF-8 + str::from_utf8(&self.0).expect("We only construct the underlying bytes from valid UTF-8") + } +} + +impl<'a> ParameterKey<'a> { + /// Construct a utf8::ParameterKey from a bytes::ParameterKey + /// + /// This is non-public and should only be used when the underlying + /// bytes are known to be valid UTF-8. + fn from_bytes(input: bytes::ParameterKey<'a>) -> Self { + Self(input) + } +} + +impl<'a> From<&'a str> for ParameterKey<'a> { + fn from(input: &'a str) -> Self { + Self(bytes::ParameterKey(input.as_bytes())) + } +} + +impl<'a> std::fmt::Display for ParameterKey<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + let as_str: &str = self; + write!(f, "{as_str}") + } +} + +impl PartialEq for ParameterKey<'_> { + /// Compares two parameter keys for equality. + /// + /// Keys are compared with dashes and underscores treated as equivalent. + /// This comparison is case-sensitive. + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +/// A single kernel command line parameter. +#[derive(Debug, Eq)] +pub struct Parameter<'a>(bytes::Parameter<'a>); + +impl<'a> Parameter<'a> { + /// Attempt to parse a single command line parameter from a UTF-8 + /// string. + /// + /// The first tuple item contains the parsed parameter, or `None` + /// if a Parameter could not be constructed from the input. This + /// occurs when the input is either empty or contains only + /// whitespace. + /// + /// Any remaining characters not consumed from the input are + /// returned as the second tuple item. + pub fn parse(input: &'a str) -> (Option, &'a str) { + let (bytes, rest) = bytes::Parameter::parse(input.as_bytes()); + + // SAFETY: we know this is valid UTF-8 since input is &str, + // and `rest` is a subslice of that &str which was split on + // whitespace + let rest = str::from_utf8(rest) + .expect("Splitting UTF-8 on ascii whitespace cannot produce invalid UTF-8 substrings"); + + match bytes { + Some(p) => (Some(Self(p)), rest), + None => (None, rest), + } + } + + /// Construct a utf8::Parameter from a bytes::Parameter + /// + /// This is non-public and should only be used when the underlying + /// bytes are known to be valid UTF-8. + fn from_bytes(bytes: bytes::Parameter<'a>) -> Self { + Self(bytes) + } + + /// Returns the key part of the parameter + pub fn key(&'a self) -> ParameterKey<'a> { + ParameterKey::from_bytes(self.0.key()) + } + + /// Returns the optional value part of the parameter + pub fn value(&'a self) -> Option<&'a str> { + self.0.value().map(|p| { + // SAFETY: We know this is valid UTF-8 since we only + // construct the underlying `bytes` from valid UTF-8 + str::from_utf8(p).expect("We only construct the underlying bytes from valid UTF-8") + }) + } +} + +impl<'a> TryFrom> for Parameter<'a> { + type Error = anyhow::Error; + + fn try_from(bytes: bytes::Parameter<'a>) -> Result { + if str::from_utf8(bytes.key().deref()).is_err() { + anyhow::bail!("Parameter key is not valid UTF-8"); + } + + if let Some(value) = bytes.value() { + if str::from_utf8(value).is_err() { + anyhow::bail!("Parameter value is not valid UTF-8"); + } + } + + Ok(Self(bytes)) + } +} + +impl<'a> std::fmt::Display for Parameter<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + match self.value() { + Some(v) => { + if v.contains(|ch: char| ch.is_ascii_whitespace()) { + write!(f, "{}=\"{}\"", self.key(), v) + } else { + write!(f, "{}={}", self.key(), v) + } + } + None => write!(f, "{}", self.key()), + } + } +} + +impl<'a> PartialEq for Parameter<'a> { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // convenience method for tests + fn param(s: &str) -> Parameter<'_> { + Parameter::parse(s).0.unwrap() + } + + #[test] + fn test_parameter_parse() { + let (p, rest) = Parameter::parse("foo"); + let p = p.unwrap(); + assert_eq!(p.key(), "foo".into()); + assert_eq!(p.value(), None); + assert_eq!(rest, ""); + + // should consume one parameter and return the rest of the input + let (p, rest) = Parameter::parse("foo=bar baz"); + let p = p.unwrap(); + assert_eq!(p.key(), "foo".into()); + assert_eq!(p.value(), Some("bar")); + assert_eq!(rest, " baz"); + + // should return None on empty or whitespace inputs + let (p, rest) = Parameter::parse(""); + assert!(p.is_none()); + assert_eq!(rest, ""); + let (p, rest) = Parameter::parse(" "); + assert!(p.is_none()); + assert_eq!(rest, ""); + } + + #[test] + fn test_parameter_simple() { + let switch = param("foo"); + assert_eq!(switch.key(), "foo".into()); + assert_eq!(switch.value(), None); + + let kv = param("bar=baz"); + assert_eq!(kv.key(), "bar".into()); + assert_eq!(kv.value(), Some("baz")); + } + + #[test] + fn test_parameter_quoted() { + let p = param("foo=\"quoted value\""); + assert_eq!(p.value(), Some("quoted value")); + } + + #[test] + fn test_parameter_extra_whitespace() { + let p = param(" foo=bar "); + assert_eq!(p.key(), "foo".into()); + assert_eq!(p.value(), Some("bar")); + } + + #[test] + fn test_parameter_internal_key_whitespace() { + let (p, rest) = Parameter::parse("foo bar=baz"); + let p = p.unwrap(); + assert_eq!(p.key(), "foo".into()); + assert_eq!(p.value(), None); + assert_eq!(rest, " bar=baz"); + } + + #[test] + fn test_parameter_pathological() { + // valid things that certified insane people would do + + // quotes don't get removed from keys + let p = param("\"\"\""); + assert_eq!(p.key(), "\"\"\"".into()); + + // quotes only get stripped from the absolute ends of values + let p = param("foo=\"internal\"quotes\"are\"ok\""); + assert_eq!(p.value(), Some("internal\"quotes\"are\"ok")); + } + + #[test] + fn test_parameter_equality() { + // substrings are not equal + let foo = param("foo"); + let bar = param("foobar"); + assert_ne!(foo, bar); + assert_ne!(bar, foo); + + // dashes and underscores are treated equally + let dashes = param("a-delimited-param"); + let underscores = param("a_delimited_param"); + assert_eq!(dashes, underscores); + + // same key, same values is equal + let dashes = param("a-delimited-param=same_values"); + let underscores = param("a_delimited_param=same_values"); + assert_eq!(dashes, underscores); + + // same key, different values is not equal + let dashes = param("a-delimited-param=different_values"); + let underscores = param("a_delimited_param=DiFfErEnT_valUEZ"); + assert_ne!(dashes, underscores); + + // mixed variants are never equal + let switch = param("same_key"); + let keyvalue = param("same_key=but_with_a_value"); + assert_ne!(switch, keyvalue); + } + + #[test] + fn test_parameter_tryfrom() { + // ok switch + let p = bytes::Parameter::parse(b"foo").0.unwrap(); + let utf = Parameter::try_from(p).unwrap(); + assert_eq!(utf.key(), "foo".into()); + assert_eq!(utf.value(), None); + + // ok key/value + let p = bytes::Parameter::parse(b"foo=bar").0.unwrap(); + let utf = Parameter::try_from(p).unwrap(); + assert_eq!(utf.key(), "foo".into()); + assert_eq!(utf.value(), Some("bar".into())); + + // bad switch + let p = bytes::Parameter::parse(b"f\xffoo").0.unwrap(); + let e = Parameter::try_from(p); + assert_eq!( + e.unwrap_err().to_string(), + "Parameter key is not valid UTF-8" + ); + + // bad key/value + let p = bytes::Parameter::parse(b"foo=b\xffar").0.unwrap(); + let e = Parameter::try_from(p); + assert_eq!( + e.unwrap_err().to_string(), + "Parameter value is not valid UTF-8" + ); + } + + #[test] + fn test_kargs_simple() { + // example taken lovingly from: + // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/params.c?id=89748acdf226fd1a8775ff6fa2703f8412b286c8#n160 + let kargs = Cmdline::from("foo=bar,bar2 baz=fuz wiz"); + let mut iter = kargs.iter(); + + assert_eq!(iter.next(), Some(param("foo=bar,bar2"))); + assert_eq!(iter.next(), Some(param("baz=fuz"))); + assert_eq!(iter.next(), Some(param("wiz"))); + assert_eq!(iter.next(), None); + + // Test the find API + assert_eq!(kargs.find("foo").unwrap().value().unwrap(), "bar,bar2"); + assert!(kargs.find("nothing").is_none()); + } + + #[test] + fn test_kargs_from_proc() { + let kargs = Cmdline::from_proc().unwrap(); + + // Not really a good way to test this other than assume + // there's at least one argument in /proc/cmdline wherever the + // tests are running + assert!(kargs.iter().count() > 0); + } + + #[test] + fn test_kargs_find_dash_hyphen() { + let kargs = Cmdline::from("a-b=1 a_b=2"); + // find should find the first one, which is a-b=1 + let p = kargs.find("a_b").unwrap(); + assert_eq!(p.key(), "a-b".into()); + assert_eq!(p.value().unwrap(), "1"); + let p = kargs.find("a-b").unwrap(); + assert_eq!(p.key(), "a-b".into()); + assert_eq!(p.value().unwrap(), "1"); + + let kargs = Cmdline::from("a_b=2 a-b=1"); + // find should find the first one, which is a_b=2 + let p = kargs.find("a_b").unwrap(); + assert_eq!(p.key(), "a_b".into()); + assert_eq!(p.value().unwrap(), "2"); + let p = kargs.find("a-b").unwrap(); + assert_eq!(p.key(), "a_b".into()); + assert_eq!(p.value().unwrap(), "2"); + } + + #[test] + fn test_kargs_extra_whitespace() { + let kargs = Cmdline::from(" foo=bar baz=fuz wiz "); + let mut iter = kargs.iter(); + + assert_eq!(iter.next(), Some(param("foo=bar"))); + assert_eq!(iter.next(), Some(param("baz=fuz"))); + assert_eq!(iter.next(), Some(param("wiz"))); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_value_of() { + let kargs = Cmdline::from("foo=bar baz=qux switch"); + + // Test existing key with value + assert_eq!(kargs.value_of("foo"), Some("bar")); + assert_eq!(kargs.value_of("baz"), Some("qux")); + + // Test key without value + assert_eq!(kargs.value_of("switch"), None); + + // Test non-existent key + assert_eq!(kargs.value_of("missing"), None); + + // Test dash/underscore equivalence + let kargs = Cmdline::from("dash-key=value1 under_key=value2"); + assert_eq!(kargs.value_of("dash_key"), Some("value1")); + assert_eq!(kargs.value_of("under-key"), Some("value2")); + } + + #[test] + fn test_require_value_of() { + let kargs = Cmdline::from("foo=bar baz=qux switch"); + + // Test existing key with value + assert_eq!(kargs.require_value_of("foo").unwrap(), "bar"); + assert_eq!(kargs.require_value_of("baz").unwrap(), "qux"); + + // Test key without value should fail + let err = kargs.require_value_of("switch").unwrap_err(); + assert!(err + .to_string() + .contains("Failed to find kernel argument 'switch'")); + + // Test non-existent key should fail + let err = kargs.require_value_of("missing").unwrap_err(); + assert!(err + .to_string() + .contains("Failed to find kernel argument 'missing'")); + + // Test dash/underscore equivalence + let kargs = Cmdline::from("dash-key=value1 under_key=value2"); + assert_eq!(kargs.require_value_of("dash_key").unwrap(), "value1"); + assert_eq!(kargs.require_value_of("under-key").unwrap(), "value2"); + } + + #[test] + fn test_find_str() { + let kargs = Cmdline::from("foo=bar baz=qux switch rd.break"); + let p = kargs.find("foo").unwrap(); + assert_eq!(p, param("foo=bar")); + let p = kargs.find("rd.break").unwrap(); + assert_eq!(p, param("rd.break")); + assert!(kargs.find("missing").is_none()); + } + + #[test] + fn test_find_all_str() { + let kargs = Cmdline::from("foo=bar rd.foo=a rd.bar=b rd.baz rd.qux=c notrd.val=d"); + let mut rd_args: Vec<_> = kargs.find_all_starting_with("rd.").collect(); + rd_args.sort_by(|a, b| a.key().cmp(&b.key())); + assert_eq!(rd_args.len(), 4); + assert_eq!(rd_args[0], param("rd.bar=b")); + assert_eq!(rd_args[1], param("rd.baz")); + assert_eq!(rd_args[2], param("rd.foo=a")); + assert_eq!(rd_args[3], param("rd.qux=c")); + } + + #[test] + fn test_param_key_eq() { + let k1 = ParameterKey::from("a-b"); + let k2 = ParameterKey::from("a_b"); + assert_eq!(k1, k2); + let k1 = ParameterKey::from("a-b"); + let k2 = ParameterKey::from("a-c"); + assert_ne!(k1, k2); + } +} diff --git a/crates/lib/src/install.rs b/crates/lib/src/install.rs index 156270dd2..79589a83f 100644 --- a/crates/lib/src/install.rs +++ b/crates/lib/src/install.rs @@ -62,7 +62,7 @@ use crate::spec::ImageReference; use crate::store::Storage; use crate::task::Task; use crate::utils::sigpolicy_from_opt; -use bootc_kernel_cmdline::Cmdline; +use bootc_kernel_cmdline::{bytes, utf8}; use bootc_mount::Filesystem; /// The toplevel boot directory @@ -83,6 +83,10 @@ const SELINUXFS: &str = "/sys/fs/selinux"; /// The mount path for uefi const EFIVARFS: &str = "/sys/firmware/efi/efivars"; pub(crate) const ARCH_USES_EFI: bool = cfg!(any(target_arch = "x86_64", target_arch = "aarch64")); +/// This is used by dracut. +pub const INITRD_ARG_PREFIX: &str = "rd."; +/// The kernel argument for configuring the rootfs flags. +pub const ROOTFLAGS: &str = "rootflags"; const DEFAULT_REPO_CONFIG: &[(&str, &str)] = &[ // Default to avoiding grub2-mkconfig etc. @@ -1653,22 +1657,24 @@ struct RootMountInfo { /// Discover how to mount the root filesystem, using existing kernel arguments and information /// about the root mount. -fn find_root_args_to_inherit(cmdline: &Cmdline, root_info: &Filesystem) -> Result { - let root = cmdline - .value_of_utf8("root") - .context("Parsing root= karg")?; +fn find_root_args_to_inherit( + cmdline: &bytes::Cmdline, + root_info: &Filesystem, +) -> Result { // If we have a root= karg, then use that + let root = cmdline + .find_utf8("root")? + .and_then(|p| p.value().map(|p| p.to_string())); let (mount_spec, kargs) = if let Some(root) = root { - let rootflags = cmdline.find_str(bootc_kernel_cmdline::ROOTFLAGS); - let inherit_kargs = - cmdline.find_all_starting_with_str(bootc_kernel_cmdline::INITRD_ARG_PREFIX); + let rootflags = cmdline.find(ROOTFLAGS); + let inherit_kargs = cmdline.find_all_starting_with(INITRD_ARG_PREFIX); ( - root.to_owned(), + root, rootflags .into_iter() .chain(inherit_kargs) - .map(|p| p.as_ref().to_owned()) - .collect(), + .map(|p| utf8::Parameter::try_from(p).map(|p| p.to_string())) + .collect::, _>>()?, ) } else { let uuid = root_info @@ -1836,7 +1842,7 @@ pub(crate) async fn install_to_filesystem( } } else if targeting_host_root { // In the to-existing-root case, look at /proc/cmdline - let cmdline = Cmdline::from_proc()?; + let cmdline = bytes::Cmdline::from_proc()?; find_root_args_to_inherit(&cmdline, &inspect)? } else { // Otherwise, gather metadata from the provided root and use its provided UUID as a @@ -2089,18 +2095,42 @@ mod tests { uuid: Some("965eb3c7-5a3f-470d-aaa2-1bcf04334bc6".into()), children: None, }; - let kargs = Cmdline::from(""); + let kargs = bytes::Cmdline::from(""); let r = find_root_args_to_inherit(&kargs, &inspect).unwrap(); assert_eq!(r.mount_spec, "UUID=965eb3c7-5a3f-470d-aaa2-1bcf04334bc6"); - let kargs = - Cmdline::from("root=/dev/mapper/root rw someother=karg rd.lvm.lv=root systemd.debug=1"); + let kargs = bytes::Cmdline::from( + "root=/dev/mapper/root rw someother=karg rd.lvm.lv=root systemd.debug=1", + ); // In this case we take the root= from the kernel cmdline let r = find_root_args_to_inherit(&kargs, &inspect).unwrap(); assert_eq!(r.mount_spec, "/dev/mapper/root"); assert_eq!(r.kargs.len(), 1); assert_eq!(r.kargs[0], "rd.lvm.lv=root"); + + // non-UTF8 data in non-essential parts of the cmdline should be ignored + let kargs = bytes::Cmdline::from( + b"root=/dev/mapper/root rw non-utf8=\xff rd.lvm.lv=root systemd.debug=1", + ); + let r = find_root_args_to_inherit(&kargs, &inspect).unwrap(); + assert_eq!(r.mount_spec, "/dev/mapper/root"); + assert_eq!(r.kargs.len(), 1); + assert_eq!(r.kargs[0], "rd.lvm.lv=root"); + + // non-UTF8 data in `root` should fail + let kargs = bytes::Cmdline::from( + b"root=/dev/mapper/ro\xffot rw non-utf8=\xff rd.lvm.lv=root systemd.debug=1", + ); + let r = find_root_args_to_inherit(&kargs, &inspect); + assert!(r.is_err()); + + // non-UTF8 data in `rd.` should fail + let kargs = bytes::Cmdline::from( + b"root=/dev/mapper/root rw non-utf8=\xff rd.lvm.lv=ro\xffot systemd.debug=1", + ); + let r = find_root_args_to_inherit(&kargs, &inspect); + assert!(r.is_err()); } // As this is a unit test we don't try to test mountpoints, just verify