Skip to content

Commit 616b617

Browse files
committed
tar: Create a new xattrs file for each checksum
This removes the code from import/export that links duplicate xattrs to a single xattrs file. This is to avoid creating too many (over 65,000) hardlinks to a single file. This will likely cause some image sizes to be increased, however compression should limit the size increase. Assisted-by: Claude Code Signed-off-by: ckyrouac <[email protected]>
1 parent fbd06e8 commit 616b617

File tree

2 files changed

+41
-128
lines changed

2 files changed

+41
-128
lines changed

crates/ostree-ext/src/tar/export.rs

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ struct OstreeTarWriter<'a, W: std::io::Write> {
125125
wrote_dirtree: HashSet<String>,
126126
wrote_dirmeta: HashSet<String>,
127127
wrote_content: HashSet<String>,
128-
wrote_xattrs: HashSet<String>,
129128
}
130129

131130
pub(crate) fn object_path(objtype: ostree::ObjectType, checksum: &str) -> Utf8PathBuf {
@@ -146,11 +145,6 @@ fn v1_xattrs_object_path(checksum: &str) -> Utf8PathBuf {
146145
format!("{OSTREEDIR}/repo/objects/{first}/{rest}.file-xattrs").into()
147146
}
148147

149-
fn v1_xattrs_link_object_path(checksum: &str) -> Utf8PathBuf {
150-
let (first, rest) = checksum.split_at(2);
151-
format!("{OSTREEDIR}/repo/objects/{first}/{rest}.file-xattrs-link").into()
152-
}
153-
154148
/// Check for "denormal" symlinks which contain "//"
155149
// See https://github.com/fedora-sysv/chkconfig/pull/67
156150
// [root@cosa-devsh ~]# rpm -qf /usr/lib/systemd/systemd-sysv-install
@@ -196,7 +190,6 @@ impl<'a, W: std::io::Write> OstreeTarWriter<'a, W> {
196190
wrote_dirmeta: HashSet::new(),
197191
wrote_dirtree: HashSet::new(),
198192
wrote_content: HashSet::new(),
199-
wrote_xattrs: HashSet::new(),
200193
};
201194
Ok(r)
202195
}
@@ -225,18 +218,6 @@ impl<'a, W: std::io::Write> OstreeTarWriter<'a, W> {
225218
tar_append_default_data(self.out, path, buf)
226219
}
227220

228-
/// Add an hardlink entry with default permissions (root/root 0644)
229-
fn append_default_hardlink(&mut self, path: &Utf8Path, link_target: &Utf8Path) -> Result<()> {
230-
let mut h = tar::Header::new_gnu();
231-
h.set_entry_type(tar::EntryType::Link);
232-
h.set_uid(0);
233-
h.set_gid(0);
234-
h.set_mode(0o644);
235-
h.set_size(0);
236-
self.out.append_link(&mut h, path, link_target)?;
237-
Ok(())
238-
}
239-
240221
/// Write the initial /sysroot/ostree/repo structure.
241222
fn write_repo_structure(&mut self) -> Result<()> {
242223
if self.wrote_initdirs {
@@ -386,24 +367,8 @@ impl<'a, W: std::io::Write> OstreeTarWriter<'a, W> {
386367
let xattrs_data = xattrs.data_as_bytes();
387368
let xattrs_data = xattrs_data.as_ref();
388369

389-
let xattrs_checksum = {
390-
let digest = openssl::hash::hash(openssl::hash::MessageDigest::sha256(), xattrs_data)?;
391-
hex::encode(digest)
392-
};
393-
394-
let path = v1_xattrs_object_path(&xattrs_checksum);
395-
// Write xattrs content into a separate `.file-xattrs` object.
396-
if !self.wrote_xattrs.contains(&xattrs_checksum) {
397-
let inserted = self.wrote_xattrs.insert(xattrs_checksum);
398-
debug_assert!(inserted);
399-
self.append_default_data(&path, xattrs_data)?;
400-
}
401-
// Write a `.file-xattrs-link` which links the file object to
402-
// the corresponding detached xattrs.
403-
{
404-
let link_obj_path = v1_xattrs_link_object_path(checksum);
405-
self.append_default_hardlink(&link_obj_path, &path)?;
406-
}
370+
let path = v1_xattrs_object_path(&checksum);
371+
self.append_default_data(&path, xattrs_data)?;
407372

408373
Ok(true)
409374
}
@@ -939,12 +904,4 @@ mod tests {
939904
let output = v1_xattrs_object_path(checksum);
940905
assert_eq!(&output, expected);
941906
}
942-
943-
#[test]
944-
fn test_v1_xattrs_link_object_path() {
945-
let checksum = "b8627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7";
946-
let expected = "sysroot/ostree/repo/objects/b8/627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7.file-xattrs-link";
947-
let output = v1_xattrs_link_object_path(checksum);
948-
assert_eq!(&output, expected);
949-
}
950907
}

crates/ostree-ext/src/tar/import.rs

Lines changed: 39 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,10 @@ pub(crate) struct Importer {
4848
repo: ostree::Repo,
4949
remote: Option<String>,
5050
verify_text: Option<String>,
51-
// Cache of xattrs, keyed by their content checksum.
51+
// Cache of xattrs, keyed by file checksum.
5252
xattrs: HashMap<String, glib::Variant>,
53-
// Reusable buffer for xattrs references. It maps a file checksum (.0)
54-
// to an xattrs checksum (.1) in the `xattrs` cache above.
55-
next_xattrs: Option<(String, String)>,
53+
// The next expected file checksum that should use cached xattrs.
54+
next_xattrs: Option<String>,
5655

5756
// Reusable buffer for reads. See also https://github.com/rust-lang/rust/issues/78485
5857
buf: Vec<u8>,
@@ -156,12 +155,6 @@ fn parse_checksum(parent: &str, name: &Utf8Path) -> Result<String> {
156155
validate_sha256(reassembled)
157156
}
158157

159-
/// Parse a `.file-xattrs-link` link target into the corresponding checksum.
160-
fn parse_xattrs_link_target(path: &Utf8Path) -> Result<String> {
161-
let (parent, rest, _objtype) = parse_object_entry_path(path)?;
162-
parse_checksum(parent, rest)
163-
}
164-
165158
impl Importer {
166159
/// Create an importer which will import an OSTree commit object.
167160
pub(crate) fn new_for_commit(repo: &ostree::Repo, remote: Option<String>) -> Self {
@@ -360,7 +353,7 @@ impl Importer {
360353
let size: usize = entry.header().size()?.try_into()?;
361354

362355
// Pop the queued xattrs reference.
363-
let (file_csum, xattrs_csum) = self
356+
let file_csum = self
364357
.next_xattrs
365358
.take()
366359
.ok_or_else(|| anyhow!("Missing xattrs reference"))?;
@@ -375,12 +368,12 @@ impl Importer {
375368
return Ok(());
376369
}
377370

378-
// Retrieve xattrs content from the cache.
371+
// Retrieve xattrs content from the cache (now keyed by file checksum).
379372
let xattrs = self
380373
.xattrs
381-
.get(&xattrs_csum)
374+
.get(checksum)
382375
.cloned()
383-
.ok_or_else(|| anyhow!("Failed to find xattrs content {}", xattrs_csum,))?;
376+
.ok_or_else(|| anyhow!("Failed to find xattrs content for {}", checksum))?;
384377

385378
match entry.header().entry_type() {
386379
tar::EntryType::Regular => {
@@ -423,7 +416,6 @@ impl Importer {
423416
Ok(())
424417
}
425418
"file-xattrs" => self.process_file_xattrs(entry, checksum),
426-
"file-xattrs-link" => self.process_file_xattrs_link(entry, checksum),
427419
"xattrs" => self.process_xattr_ref(entry, checksum),
428420
kind => {
429421
let objtype = objtype_from_string(kind)
@@ -447,50 +439,36 @@ impl Importer {
447439
#[context("Processing file xattrs")]
448440
fn process_file_xattrs(
449441
&mut self,
450-
entry: tar::Entry<impl std::io::Read>,
451-
checksum: String,
452-
) -> Result<()> {
453-
self.cache_xattrs_content(entry, Some(checksum))?;
454-
Ok(())
455-
}
456-
457-
/// Process a `.file-xattrs-link` object (v1).
458-
///
459-
/// This is an hardlink that contains extended attributes for a content object.
460-
/// When the max hardlink count is reached, this object may also be encoded as
461-
/// a regular file instead.
462-
#[context("Processing xattrs link")]
463-
fn process_file_xattrs_link(
464-
&mut self,
465-
entry: tar::Entry<impl std::io::Read>,
442+
mut entry: tar::Entry<impl std::io::Read>,
466443
checksum: String,
467444
) -> Result<()> {
468-
use tar::EntryType::{Link, Regular};
469-
if let Some(prev) = &self.next_xattrs {
470-
bail!(
471-
"Found previous dangling xattrs for file object '{}'",
472-
prev.0
473-
);
445+
// Read the xattrs content directly and associate with the file checksum
446+
let header = entry.header();
447+
if header.entry_type() != tar::EntryType::Regular {
448+
return Err(anyhow!(
449+
"Invalid xattr entry of type {:?}",
450+
header.entry_type()
451+
));
452+
}
453+
let size = header.size()?;
454+
if size > MAX_XATTR_SIZE as u64 {
455+
return Err(anyhow!(
456+
"xattrs object of size {} exceeds {} bytes",
457+
size,
458+
MAX_XATTR_SIZE
459+
));
474460
}
475461

476-
// Extract the xattrs checksum from the link target or from the content (v1).
477-
// Later, it will be used as the key for a lookup into the `self.xattrs` cache.
478-
let xattrs_checksum = match entry.header().entry_type() {
479-
Link => {
480-
let link_target = entry
481-
.link_name()?
482-
.ok_or_else(|| anyhow!("No xattrs link content for {}", checksum))?;
483-
let xattr_target = Utf8Path::from_path(&link_target)
484-
.ok_or_else(|| anyhow!("Invalid non-UTF8 xattrs link {}", checksum))?;
485-
parse_xattrs_link_target(xattr_target)?
486-
}
487-
Regular => self.cache_xattrs_content(entry, None)?,
488-
x => bail!("Unexpected xattrs type '{:?}' found for {}", x, checksum),
489-
};
462+
let mut contents = vec![0u8; size as usize];
463+
entry.read_exact(contents.as_mut_slice())?;
464+
let data: glib::Bytes = contents.as_slice().into();
465+
466+
// Parse as the correct xattrs variant type
467+
let v = Variant::from_bytes::<&[(&[u8], &[u8])]>(&data);
490468

491-
// Now xattrs are properly cached for the next content object in the stream,
492-
// which should match `checksum`.
493-
self.next_xattrs = Some((checksum, xattrs_checksum));
469+
// Store xattrs directly keyed by file checksum
470+
self.xattrs.insert(checksum.clone(), v);
471+
self.next_xattrs = Some(checksum);
494472

495473
Ok(())
496474
}
@@ -505,10 +483,7 @@ impl Importer {
505483
target: String,
506484
) -> Result<()> {
507485
if let Some(prev) = &self.next_xattrs {
508-
bail!(
509-
"Found previous dangling xattrs for file object '{}'",
510-
prev.0
511-
);
486+
bail!("Found previous dangling xattrs for file object '{}'", prev);
512487
}
513488

514489
// Parse the xattrs checksum from the link target (v0).
@@ -528,9 +503,12 @@ impl Importer {
528503
.to_string();
529504
let xattrs_checksum = validate_sha256(xattr_target)?;
530505

531-
// Now xattrs are properly cached for the next content object in the stream,
532-
// which should match `checksum`.
533-
self.next_xattrs = Some((target, xattrs_checksum));
506+
// Cache the xattrs for this target file using its checksum from the v0 format
507+
// We need to look up the xattrs by the xattrs_checksum and store it by target
508+
if let Some(xattrs) = self.xattrs.get(&xattrs_checksum).cloned() {
509+
self.xattrs.insert(target.clone(), xattrs);
510+
}
511+
self.next_xattrs = Some(target);
534512

535513
Ok(())
536514
}
@@ -915,26 +893,4 @@ mod tests {
915893
let output = parse_checksum(parent, &Utf8PathBuf::from(name)).unwrap();
916894
assert_eq!(output, expected);
917895
}
918-
919-
#[test]
920-
fn test_parse_xattrs_link_target() {
921-
let err_cases = &[
922-
"",
923-
"b8627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7.file-xattrs",
924-
"../b8/62.file-xattrs",
925-
];
926-
for input in err_cases {
927-
parse_xattrs_link_target(Utf8Path::new(input)).unwrap_err();
928-
}
929-
930-
let ok_cases = &[
931-
"../b8/627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7.file-xattrs",
932-
"sysroot/ostree/repo/objects/b8/627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7.file-xattrs",
933-
];
934-
let expected = "b8627e3ef0f255a322d2bd9610cfaaacc8f122b7f8d17c0e7e3caafa160f9fc7";
935-
for input in ok_cases {
936-
let output = parse_xattrs_link_target(Utf8Path::new(input)).unwrap();
937-
assert_eq!(output, expected);
938-
}
939-
}
940896
}

0 commit comments

Comments
 (0)