Skip to content

Commit a0848bf

Browse files
splitstream: remove special handling of padding
Let's just have users write the padding as a separate inline section after they write the external data. This makes things a lot easier and reduces thrashing of the internal buffer. Signed-off-by: Allison Karlitskaya <[email protected]>
1 parent dc8d7dc commit a0848bf

File tree

2 files changed

+20
-28
lines changed

2 files changed

+20
-28
lines changed

crates/composefs-oci/src/tar.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ pub fn split(
7373
tar_stream.read_exact(&mut buffer)?;
7474

7575
if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
76-
// non-empty regular file: store the data in the object store
77-
let padding = buffer.split_off(actual_size);
78-
writer.write_external(&buffer, padding)?;
76+
// non-empty regular file: store the data external and the trailing padding inline
77+
writer.write_external(&buffer[..actual_size])?;
78+
writer.write_inline(&buffer[actual_size..]);
7979
} else {
8080
// else: store the data inline in the split stream
8181
writer.write_inline(&buffer);
@@ -112,7 +112,8 @@ pub async fn split_async(
112112
if header.entry_type() == EntryType::Regular && actual_size > INLINE_CONTENT_MAX {
113113
// non-empty regular file: store the data in the object store
114114
let padding = buffer.split_off(actual_size);
115-
writer.write_external_async(buffer, padding).await?;
115+
writer.write_external_async(buffer).await?;
116+
writer.write_inline(&padding);
116117
} else {
117118
// else: store the data inline in the split stream
118119
writer.write_inline(&buffer);

crates/composefs/src/splitstream.rs

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -137,71 +137,62 @@ impl<ObjectID: FsVerityHashValue> SplitStreamWriter<ObjectID> {
137137
Ok(writer.write_all(data)?)
138138
}
139139

140-
/// flush any buffered inline data, taking new_value as the new value of the buffer
141-
fn flush_inline(&mut self, new_value: Vec<u8>) -> Result<()> {
140+
fn flush_inline(&mut self) -> Result<()> {
142141
if !self.inline_content.is_empty() {
143142
Self::write_fragment(
144143
&mut self.writer,
145144
self.inline_content.len(),
146145
&self.inline_content,
147146
)?;
148-
self.inline_content = new_value;
147+
self.inline_content.clear();
149148
}
150149
Ok(())
151150
}
152151

153-
/// really, "add inline content to the buffer"
154-
/// you need to call .flush_inline() later
152+
/// Write inline data to the stream.
155153
pub fn write_inline(&mut self, data: &[u8]) {
156154
if let Some((ref mut sha256, ..)) = self.sha256 {
157155
sha256.update(data);
158156
}
159157
self.inline_content.extend(data);
160158
}
161159

162-
/// write a reference to external data to the stream. If the external data had padding in the
163-
/// stream which is not stored in the object then pass it here as well and it will be stored
164-
/// inline after the reference.
165-
fn write_reference(&mut self, reference: &ObjectID, padding: Vec<u8>) -> Result<()> {
166-
// Flush the inline data before we store the external reference. Any padding from the
167-
// external data becomes the start of a new inline block.
168-
self.flush_inline(padding)?;
160+
// common part of .write_external() and .write_external_async()
161+
fn write_reference(&mut self, id: ObjectID) -> Result<()> {
162+
// Flush any buffered inline data before we store the external reference.
163+
self.flush_inline()?;
169164

170-
Self::write_fragment(&mut self.writer, 0, reference.as_bytes())
165+
Self::write_fragment(&mut self.writer, 0, id.as_bytes())
171166
}
172167

173-
/// Writes data as an external object reference with optional padding.
168+
/// Write externally-split data to the stream.
174169
///
175170
/// The data is stored in the repository and a reference is written to the stream.
176-
/// Any padding bytes are stored inline after the reference.
177-
pub fn write_external(&mut self, data: &[u8], padding: Vec<u8>) -> Result<()> {
171+
pub fn write_external(&mut self, data: &[u8]) -> Result<()> {
178172
if let Some((ref mut sha256, ..)) = self.sha256 {
179173
sha256.update(data);
180-
sha256.update(&padding);
181174
}
182175
let id = self.repo.ensure_object(data)?;
183-
self.write_reference(&id, padding)
176+
self.write_reference(id)
184177
}
185178

186-
/// Asynchronously writes data as an external object reference with optional padding.
179+
/// Asynchronously write externally-split data to the stream.
187180
///
188181
/// The data is stored in the repository asynchronously and a reference is written to the stream.
189-
/// Any padding bytes are stored inline after the reference.
190-
pub async fn write_external_async(&mut self, data: Vec<u8>, padding: Vec<u8>) -> Result<()> {
182+
pub async fn write_external_async(&mut self, data: Vec<u8>) -> Result<()> {
191183
if let Some((ref mut sha256, ..)) = self.sha256 {
192184
sha256.update(&data);
193-
sha256.update(&padding);
194185
}
195186
let id = self.repo.ensure_object_async(data).await?;
196-
self.write_reference(&id, padding)
187+
self.write_reference(id)
197188
}
198189

199190
/// Finalizes the split stream and returns its object ID.
200191
///
201192
/// Flushes any remaining inline content, validates the SHA256 hash if provided,
202193
/// and stores the compressed stream in the repository.
203194
pub fn done(mut self) -> Result<ObjectID> {
204-
self.flush_inline(vec![])?;
195+
self.flush_inline()?;
205196

206197
if let Some((context, expected)) = self.sha256 {
207198
if Into::<Sha256Digest>::into(context.finalize()) != expected {

0 commit comments

Comments
 (0)