Skip to content

Commit 15d7e28

Browse files
committed
kernel_cmdline: Add canonicalized() methods
This seems generally useful, mostly I could see always canonicalizing the cmdline generated by bootc to remove a source of potential drift/nondeterminism and better enable reproducible builds. Signed-off-by: John Eckersberg <[email protected]>
1 parent 7e81f9c commit 15d7e28

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

crates/kernel_cmdline/src/bytes.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,31 @@ impl<'a> Cmdline<'a> {
340340
removed
341341
}
342342

343+
/// Returns the canonicalized version of the `Cmdline`.
344+
///
345+
/// This:
346+
///
347+
/// 1. Sorts the parameter list
348+
/// 2. Canonicalizes each `Parameter`
349+
/// 3. Joins each parameter together with a single space ' '
350+
///
351+
/// # Examples
352+
///
353+
/// ```
354+
/// use bootc_kernel_cmdline::bytes::Cmdline;
355+
///
356+
/// let cmdline = Cmdline::from(b"z a=\"b c\"");
357+
/// assert_eq!(&cmdline.canonicalized(), b"\"a=b c\" z");
358+
/// ```
359+
pub fn canonicalized(&self) -> Vec<u8> {
360+
let mut params = self.iter().collect::<Vec<_>>();
361+
params.sort();
362+
363+
let canonicalized_params: Vec<Vec<u8>> = params.iter().map(|p| p.canonicalized()).collect();
364+
365+
canonicalized_params.join(&b' ')
366+
}
367+
343368
#[cfg(test)]
344369
pub(crate) fn is_owned(&self) -> bool {
345370
matches!(self.0, Cow::Owned(_))
@@ -426,6 +451,21 @@ impl ParameterKey<'_> {
426451
.iter()
427452
.map(|&c: &u8| if c == b'-' { b'_' } else { c })
428453
}
454+
455+
/// Returns the canonicalized version of the key. This replaces
456+
/// all dashes '-' with underscores '_'.
457+
///
458+
/// # Example
459+
///
460+
/// ```
461+
/// use bootc_kernel_cmdline::bytes::ParameterKey;
462+
///
463+
/// assert_eq!(&ParameterKey::from("key-with-dashes").canonicalized(),
464+
/// "key_with_dashes".as_bytes());
465+
/// ```
466+
pub fn canonicalized(&self) -> Vec<u8> {
467+
self.iter().collect()
468+
}
429469
}
430470

431471
impl PartialEq for ParameterKey<'_> {
@@ -528,6 +568,58 @@ impl<'a> Parameter<'a> {
528568
pub fn value(&self) -> Option<&'a [u8]> {
529569
self.value
530570
}
571+
572+
/// Returns the canonical representation of the parameter.
573+
///
574+
/// The canonical representation:
575+
///
576+
/// 1. Will use the canonicalized form of the key via
577+
/// `ParameterKey::canonicalized`
578+
///
579+
/// 2. Will be "externally" quoted if either the key or
580+
/// (optional) value contains ascii whitespace.
581+
///
582+
/// 3. Unnecessary quoting will be removed.
583+
///
584+
/// # Examples
585+
///
586+
/// ```
587+
/// use bootc_kernel_cmdline::bytes::Parameter;
588+
///
589+
/// // key is canonicalized
590+
/// assert_eq!(Parameter::parse("a-dashed-key").unwrap().canonicalized(),
591+
/// "a_dashed_key".as_bytes());
592+
///
593+
/// // quotes are externally added if needed
594+
/// assert_eq!(Parameter::parse("foo=\"has some spaces\"").unwrap().canonicalized(),
595+
/// "\"foo=has some spaces\"".as_bytes());
596+
///
597+
/// // unnecessary quotes are removed
598+
/// assert_eq!(Parameter::parse("foo=\"bar\"").unwrap().canonicalized(),
599+
/// "foo=bar".as_bytes());
600+
/// ```
601+
pub fn canonicalized(&self) -> Vec<u8> {
602+
let key = self.key.canonicalized();
603+
let spaces = key.iter().any(|b| b.is_ascii_whitespace())
604+
|| self
605+
.value
606+
.map_or(false, |val| val.iter().any(|b| b.is_ascii_whitespace()));
607+
608+
let mut res = if spaces { vec![b'"'] } else { vec![] };
609+
610+
res.extend(&key);
611+
612+
if let Some(val) = self.value {
613+
res.push(b'=');
614+
res.extend(val);
615+
}
616+
617+
if spaces {
618+
res.push(b'"');
619+
}
620+
621+
res
622+
}
531623
}
532624

533625
impl PartialEq for Parameter<'_> {

crates/kernel_cmdline/src/utf8.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,29 @@ impl<'a> Cmdline<'a> {
203203
self.0.remove_exact(&param.0)
204204
}
205205

206+
/// Returns the canonicalized version of the `Cmdline`.
207+
///
208+
/// This:
209+
///
210+
/// 1. Sorts the parameter list
211+
/// 2. Canonicalizes each `Parameter`
212+
/// 3. Joins each parameter together with a single space ' '
213+
///
214+
/// # Examples
215+
///
216+
/// ```
217+
/// use bootc_kernel_cmdline::utf8::Cmdline;
218+
///
219+
/// let cmdline = Cmdline::from("z a=\"b c\"");
220+
/// assert_eq!(&cmdline.canonicalized(), "\"a=b c\" z");
221+
/// ```
222+
pub fn canonicalized(&self) -> String {
223+
self.0
224+
.canonicalized()
225+
.try_into()
226+
.expect("We only construct the underlying bytes from valid UTF-8")
227+
}
228+
206229
#[cfg(test)]
207230
pub(crate) fn is_owned(&self) -> bool {
208231
self.0.is_owned()
@@ -298,6 +321,24 @@ impl<'a> ParameterKey<'a> {
298321
fn from_bytes(input: bytes::ParameterKey<'a>) -> Self {
299322
Self(input)
300323
}
324+
325+
/// Returns the canonicalized version of the key. This replaces
326+
/// all dashes '-' with underscores '_'.
327+
///
328+
/// # Example
329+
///
330+
/// ```
331+
/// use bootc_kernel_cmdline::utf8::ParameterKey;
332+
///
333+
/// assert_eq!(ParameterKey::from("key-with-dashes").canonicalized(),
334+
/// "key_with_dashes".to_string());
335+
/// ```
336+
pub fn canonicalized(&self) -> String {
337+
self.0
338+
.canonicalized()
339+
.try_into()
340+
.expect("We only construct the underlying bytes from valid UTF-8")
341+
}
301342
}
302343

303344
impl<'a, T: AsRef<str> + ?Sized> From<&'a T> for ParameterKey<'a> {
@@ -358,6 +399,42 @@ impl<'a> Parameter<'a> {
358399
str::from_utf8(p).expect("We only construct the underlying bytes from valid UTF-8")
359400
})
360401
}
402+
403+
/// Returns the canonical representation of the parameter.
404+
///
405+
/// The canonical representation:
406+
///
407+
/// 1. Will use the canonicalized form of the key via
408+
/// `ParameterKey::canonicalized`
409+
///
410+
/// 2. Will be "externally" quoted if either the key or
411+
/// (optional) value contains ascii whitespace.
412+
///
413+
/// 3. Unnecessary quoting will be removed.
414+
///
415+
/// # Examples
416+
///
417+
/// ```
418+
/// use bootc_kernel_cmdline::utf8::Parameter;
419+
///
420+
/// // key is canonicalized
421+
/// assert_eq!(Parameter::parse("a-dashed-key").unwrap().canonicalized(),
422+
/// "a_dashed_key".to_string());
423+
///
424+
/// // quotes are externally added if needed
425+
/// assert_eq!(Parameter::parse("foo=\"has some spaces\"").unwrap().canonicalized(),
426+
/// "\"foo=has some spaces\"".to_string());
427+
///
428+
/// // unnecessary quotes are removed
429+
/// assert_eq!(Parameter::parse("foo=\"bar\"").unwrap().canonicalized(),
430+
/// "foo=bar".to_string());
431+
/// ```
432+
pub fn canonicalized(&self) -> String {
433+
self.0
434+
.canonicalized()
435+
.try_into()
436+
.expect("We only construct the underlying bytes from valid UTF-8")
437+
}
361438
}
362439

363440
impl<'a> TryFrom<bytes::Parameter<'a>> for Parameter<'a> {

0 commit comments

Comments
 (0)