Skip to content

Commit 213f879

Browse files
committed
kernel_cmdline: Drop to_lossy, add more utf8 variants
I don't think anything should use to_lossy() by default. It's great to have a correct kernel argument parser that doesn't bomb on non-UTF8 but at the same time in our code we can just I think ignore kernel arguments which aren't UTF-8. Maybe we should warn if e.g. we find a `root=<nonutf8` or so but eh. Everything else in the bootc codebase works in terms of strings so let's just make it really easy to only get strings out. Implementation notes: - I struggled with lifetimes in this one and couldn't get it to work to reuse the Parameter (byte oriented) parser and just reimplemented it in the str path - When I tossed this problem at both Claude and Gemini they both gave up; and Gemini ended up deleting all the code and declaring success Unit tests (after I manually fixed up all the lifetime stuff in the core code) are Assisted-by: Gemini-CLI Signed-off-by: Colin Walters <[email protected]>
1 parent 664f06e commit 213f879

File tree

2 files changed

+152
-43
lines changed

2 files changed

+152
-43
lines changed

crates/lib/src/install.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,17 +1668,15 @@ fn find_root_args_to_inherit(cmdline: &Cmdline, root_info: &Filesystem) -> Resul
16681668
.context("Parsing root= karg")?;
16691669
// If we have a root= karg, then use that
16701670
let (mount_spec, kargs) = if let Some(root) = root {
1671-
let rootflags = cmdline.find(crate::kernel_cmdline::ROOTFLAGS);
1672-
let inherit_kargs = cmdline.iter().filter(|arg| {
1673-
arg.key
1674-
.starts_with(crate::kernel_cmdline::INITRD_ARG_PREFIX)
1675-
});
1671+
let rootflags = cmdline.find_str(crate::kernel_cmdline::ROOTFLAGS);
1672+
let inherit_kargs =
1673+
cmdline.find_all_starting_with_str(crate::kernel_cmdline::INITRD_ARG_PREFIX);
16761674
(
16771675
root.to_owned(),
16781676
rootflags
16791677
.into_iter()
16801678
.chain(inherit_kargs)
1681-
.map(|p| p.to_string())
1679+
.map(|p| p.as_ref().to_owned())
16821680
.collect(),
16831681
)
16841682
} else {

crates/lib/src/kernel_cmdline.rs

Lines changed: 148 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use std::borrow::Cow;
88
use anyhow::Result;
99

1010
/// This is used by dracut.
11-
pub(crate) const INITRD_ARG_PREFIX: &[u8] = b"rd.";
11+
pub(crate) const INITRD_ARG_PREFIX: &str = "rd.";
1212
/// The kernel argument for configuring the rootfs flags.
13-
pub(crate) const ROOTFLAGS: &[u8] = b"rootflags";
13+
pub(crate) const ROOTFLAGS: &str = "rootflags";
1414

1515
/// A parsed kernel command line.
1616
///
@@ -41,7 +41,7 @@ impl<'a> Cmdline<'a> {
4141
/// Properly handles quoted values containing whitespace and splits on
4242
/// unquoted whitespace characters. Parameters are parsed as either
4343
/// key-only switches or key=value pairs.
44-
pub fn iter(&'a self) -> impl Iterator<Item = Parameter<'a>> {
44+
pub fn iter(&'a self) -> impl Iterator<Item = Parameter<'a>> + 'a {
4545
let mut in_quotes = false;
4646

4747
self.0
@@ -63,6 +63,29 @@ impl<'a> Cmdline<'a> {
6363
self.iter().find(|p| p.key == key)
6464
}
6565

66+
/// Locate a kernel argument with the given key name that must be UTF-8.
67+
///
68+
/// Otherwise the same as [`Self::find`].
69+
pub fn find_str<'k: 'a>(&'a self, key: &'k str) -> Option<ParameterStr<'a>> {
70+
let key = ParameterKeyStr(key);
71+
self.iter()
72+
.filter_map(|p| p.to_str())
73+
.find(move |p| p.key == key)
74+
}
75+
76+
/// Find all kernel arguments starting with the given prefix which must be UTF-8.
77+
/// Non-UTF8 values are ignored.
78+
///
79+
/// This is a variant of [`Self::find`].
80+
pub fn find_all_starting_with_str<'b>(
81+
&'a self,
82+
prefix: &'a str,
83+
) -> impl Iterator<Item = ParameterStr<'a>> {
84+
self.iter()
85+
.filter_map(|p| p.to_str())
86+
.filter(move |p| p.key.0.starts_with(prefix))
87+
}
88+
6689
/// Locate the value of the kernel argument with the given key name.
6790
///
6891
/// Returns the first value matching the given key, or `None` if not found.
@@ -121,6 +144,18 @@ impl<'a> From<&'a [u8]> for ParameterKey<'a> {
121144
}
122145
}
123146

147+
/// A single kernel command line parameter key that is known to be UTF-8.
148+
///
149+
/// Otherwise the same as [`ParameterKey`].
150+
#[derive(Debug, Eq)]
151+
pub(crate) struct ParameterKeyStr<'a>(&'a str);
152+
153+
impl<'a> From<&'a str> for ParameterKeyStr<'a> {
154+
fn from(value: &'a str) -> Self {
155+
Self(value)
156+
}
157+
}
158+
124159
/// A single kernel command line parameter.
125160
#[derive(Debug, Eq)]
126161
pub(crate) struct Parameter<'a> {
@@ -133,20 +168,29 @@ pub(crate) struct Parameter<'a> {
133168
pub value: Option<&'a [u8]>,
134169
}
135170

171+
/// A single kernel command line parameter.
172+
#[derive(Debug, PartialEq, Eq)]
173+
pub(crate) struct ParameterStr<'a> {
174+
/// The original value
175+
pub parameter: &'a str,
176+
/// The parameter key
177+
pub key: ParameterKeyStr<'a>,
178+
/// The parameter value, if present
179+
pub value: Option<&'a str>,
180+
}
181+
136182
impl<'a> Parameter<'a> {
137-
/// Returns the key as a lossy UTF-8 string.
138-
///
139-
/// Invalid UTF-8 sequences are replaced with the Unicode replacement character.
140-
pub fn key_lossy(&self) -> String {
141-
String::from_utf8_lossy(&self.key).to_string()
183+
pub fn to_str(self) -> Option<ParameterStr<'a>> {
184+
let Ok(parameter) = std::str::from_utf8(self.parameter) else {
185+
return None;
186+
};
187+
Some(ParameterStr::from(parameter))
142188
}
189+
}
143190

144-
/// Returns the value as a lossy UTF-8 string.
145-
///
146-
/// Invalid UTF-8 sequences are replaced with the Unicode replacement character.
147-
/// Returns an empty string if no value is present.
148-
pub fn value_lossy(&self) -> String {
149-
String::from_utf8_lossy(self.value.unwrap_or(&[])).to_string()
191+
impl<'a> AsRef<str> for ParameterStr<'a> {
192+
fn as_ref(&self) -> &str {
193+
self.parameter
150194
}
151195
}
152196

@@ -215,33 +259,37 @@ impl PartialEq for ParameterKey<'_> {
215259
}
216260
}
217261

262+
impl<'a> From<&'a str> for ParameterStr<'a> {
263+
fn from(parameter: &'a str) -> Self {
264+
let (key, value) = if let Some((key, value)) = parameter.split_once('=') {
265+
let value = value
266+
.strip_prefix('"')
267+
.unwrap_or(value)
268+
.strip_suffix('"')
269+
.unwrap_or(value);
270+
(key, Some(value))
271+
} else {
272+
(parameter, None)
273+
};
274+
let key = ParameterKeyStr(key);
275+
ParameterStr {
276+
parameter,
277+
key,
278+
value,
279+
}
280+
}
281+
}
282+
218283
impl<'a> PartialEq for Parameter<'a> {
219284
fn eq(&self, other: &Self) -> bool {
220285
// Note we don't compare parameter because we want hyphen-dash insensitivity for the key
221286
self.key == other.key && self.value == other.value
222287
}
223288
}
224289

225-
impl std::fmt::Display for Parameter<'_> {
226-
/// Formats the parameter for display.
227-
///
228-
/// Key-only parameters are displayed as just the key.
229-
/// Key-value parameters are displayed as `key=value`.
230-
/// Values containing whitespace are automatically quoted.
231-
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
232-
let key = self.key_lossy();
233-
234-
if self.value.is_some() {
235-
let value = self.value_lossy();
236-
237-
if value.chars().any(|c| c.is_ascii_whitespace()) {
238-
write!(f, "{key}=\"{value}\"")
239-
} else {
240-
write!(f, "{key}={value}")
241-
}
242-
} else {
243-
write!(f, "{key}")
244-
}
290+
impl<'a> PartialEq for ParameterKeyStr<'a> {
291+
fn eq(&self, other: &Self) -> bool {
292+
ParameterKey(self.0.as_bytes()) == ParameterKey(other.0.as_bytes())
245293
}
246294
}
247295

@@ -287,9 +335,6 @@ mod tests {
287335
p.push(non_utf8_byte[0]);
288336
let p = Parameter::from(&p);
289337
assert_eq!(p.value, Some(non_utf8_byte.as_slice()));
290-
291-
// lossy replacement sanity check
292-
assert_eq!(p.value_lossy(), char::REPLACEMENT_CHARACTER.to_string());
293338
}
294339

295340
#[test]
@@ -472,4 +517,70 @@ mod tests {
472517
let kargs = Cmdline::from(&invalid_utf8);
473518
assert!(kargs.require_value_of_utf8("invalid").is_err());
474519
}
520+
521+
#[test]
522+
fn test_find_str() {
523+
let kargs = Cmdline::from(b"foo=bar baz=qux switch rd.break".as_slice());
524+
let p = kargs.find_str("foo").unwrap();
525+
assert_eq!(p, ParameterStr::from("foo=bar"));
526+
assert_eq!(p.as_ref(), "foo=bar");
527+
let p = kargs.find_str("rd.break").unwrap();
528+
assert_eq!(p, ParameterStr::from("rd.break"));
529+
assert!(kargs.find_str("missing").is_none());
530+
}
531+
532+
#[test]
533+
fn test_find_all_str() {
534+
let kargs =
535+
Cmdline::from(b"foo=bar rd.foo=a rd.bar=b rd.baz rd.qux=c notrd.val=d".as_slice());
536+
let mut rd_args: Vec<_> = kargs.find_all_starting_with_str("rd.").collect();
537+
rd_args.sort_by(|a, b| a.key.0.cmp(b.key.0));
538+
assert_eq!(rd_args.len(), 4);
539+
assert_eq!(rd_args[0], ParameterStr::from("rd.bar=b"));
540+
assert_eq!(rd_args[1], ParameterStr::from("rd.baz"));
541+
assert_eq!(rd_args[2], ParameterStr::from("rd.foo=a"));
542+
assert_eq!(rd_args[3], ParameterStr::from("rd.qux=c"));
543+
}
544+
545+
#[test]
546+
fn test_param_to_str() {
547+
let p = Parameter::from("foo=bar");
548+
let p_str = p.to_str().unwrap();
549+
assert_eq!(p_str, ParameterStr::from("foo=bar"));
550+
let non_utf8_byte = b"\xff";
551+
let mut p_u8 = b"foo=".to_vec();
552+
p_u8.push(non_utf8_byte[0]);
553+
let p = Parameter::from(&p_u8);
554+
assert!(p.to_str().is_none());
555+
}
556+
557+
#[test]
558+
fn test_param_key_str_eq() {
559+
let k1 = ParameterKeyStr("a-b");
560+
let k2 = ParameterKeyStr("a_b");
561+
assert_eq!(k1, k2);
562+
let k1 = ParameterKeyStr("a-b");
563+
let k2 = ParameterKeyStr("a-c");
564+
assert_ne!(k1, k2);
565+
}
566+
567+
#[test]
568+
fn test_kargs_non_utf8() {
569+
let non_utf8_val = b"an_invalid_key=\xff";
570+
let mut kargs_bytes = b"foo=bar ".to_vec();
571+
kargs_bytes.extend_from_slice(non_utf8_val);
572+
kargs_bytes.extend_from_slice(b" baz=qux");
573+
let kargs = Cmdline::from(kargs_bytes.as_slice());
574+
575+
// We should be able to find the valid kargs
576+
assert_eq!(kargs.find_str("foo").unwrap().value, Some("bar"));
577+
assert_eq!(kargs.find_str("baz").unwrap().value, Some("qux"));
578+
579+
// But we should not find the invalid one via find_str
580+
assert!(kargs.find("an_invalid_key").unwrap().to_str().is_none());
581+
582+
// And even using the raw find, trying to convert it to_str will fail.
583+
let raw_param = kargs.find("an_invalid_key").unwrap();
584+
assert_eq!(raw_param.value.unwrap(), b"\xff");
585+
}
475586
}

0 commit comments

Comments
 (0)