Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple fixes for journaling with fast boot and resumption #4779

Merged
merged 18 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
ed397fe
Now using explicit file descriptors when opening and creating files f…
john-sharratt Jun 1, 2024
002fab0
Now using deterministic inode generation
john-sharratt Jun 1, 2024
1ef6b55
Switched over to a faster hash for the deterministic inodes
john-sharratt Jun 1, 2024
47741a2
Compiling xxhash with optimizations when in debug mode as its on the …
john-sharratt Jun 1, 2024
4937d1d
Fixed a bug on accessible area not being preserved on memory with a b…
john-sharratt Jun 1, 2024
75b7480
Reduced the amount of logging so its easier to debug
john-sharratt Jun 1, 2024
304e845
Added more debug info to test a theory
john-sharratt Jun 1, 2024
10bf445
Merge remote-tracking branch 'origin/main' into fix-for-compacted-jou…
john-sharratt Jun 1, 2024
84c52f3
Deleted directories now erase the events related to their creation
john-sharratt Jun 2, 2024
2166283
Only the last change directory event is retained
john-sharratt Jun 2, 2024
bc08635
Compacting will now remove transient events such as local disk redo l…
john-sharratt Jun 2, 2024
32618ef
Fixed a bug where the order of syscalls determined if the modified da…
john-sharratt Jun 2, 2024
7be262e
Removed the debugging code
john-sharratt Jun 2, 2024
f6176d8
Merge remote-tracking branch 'origin/main' into fix-for-compacted-jou…
john-sharratt Jun 3, 2024
746d0ea
Fixed a lint and compile error on the JS code
john-sharratt Jun 3, 2024
30eaad0
Fix for compile error
john-sharratt Jun 3, 2024
32038ea
Fixes for clippy
john-sharratt Jun 3, 2024
98d44ae
Linting fix
john-sharratt Jun 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ opt-level = 3
opt-level = 3
[profile.dev.package.sha2]
opt-level = 3
[profile.dev.package.xxhash-rust]
opt-level = 3
[profile.dev.package.digest]
opt-level = 3

Expand Down
185 changes: 165 additions & 20 deletions lib/journal/src/concrete/compacting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,19 @@ struct State {
snapshots: Vec<usize>,
// Last tty event thats been set
tty: Option<usize>,
// The last change directory event
chdir: Option<usize>,
// Events that create a particular directory
create_directory: HashMap<String, usize>,
create_directory: HashMap<String, DescriptorLookup>,
// Events that remove a particular directory
remove_directory: HashMap<String, usize>,
// When creating and truncating a file we have a special
// lookup so that duplicates can be erased
create_trunc_file: HashMap<String, Fd>,
// When modifying an existing file
modify_file: HashMap<String, Fd>,
// Events that unlink a file
unlink_file: HashMap<String, usize>,
// Thread events are only maintained while the thread and the
// process are still running
thread_map: HashMap<u32, usize>,
Expand Down Expand Up @@ -99,6 +105,9 @@ impl State {
if let Some(tty) = self.tty.as_ref() {
filter.add_event_to_whitelist(*tty);
}
if let Some(tty) = self.chdir.as_ref() {
filter.add_event_to_whitelist(*tty);
}
for e in self.snapshots.iter() {
filter.add_event_to_whitelist(*e);
}
Expand All @@ -108,12 +117,22 @@ impl State {
for t in self.thread_map.iter() {
filter.add_event_to_whitelist(*t.1);
}
for (_, e) in self.create_directory.iter() {
for (_, e) in self.remove_directory.iter() {
filter.add_event_to_whitelist(*e);
}
for (_, e) in self.remove_directory.iter() {
for (_, e) in self.unlink_file.iter() {
filter.add_event_to_whitelist(*e);
}
for (_, l) in self.create_directory.iter() {
if let Some(d) = self.descriptors.get(l) {
for e in d.events.iter() {
filter.add_event_to_whitelist(*e);
}
for e in d.write_map.values() {
filter.add_event_to_whitelist(*e);
}
}
}
for (_, l) in self
.suspect_descriptors
.iter()
Expand Down Expand Up @@ -203,6 +222,7 @@ impl CompactingJournal {
inner_tx: tx,
inner_rx: rx.as_restarted()?,
tty: None,
chdir: None,
snapshots: Default::default(),
memory_map: Default::default(),
thread_map: Default::default(),
Expand All @@ -211,6 +231,8 @@ impl CompactingJournal {
create_directory: Default::default(),
remove_directory: Default::default(),
create_trunc_file: Default::default(),
modify_file: Default::default(),
unlink_file: Default::default(),
suspect_descriptors: Default::default(),
keep_descriptors: Default::default(),
stdio_descriptors: Default::default(),
Expand Down Expand Up @@ -367,6 +389,9 @@ impl WritableJournal for CompactingJournalTx {
JournalEntry::TtySetV1 { .. } => {
state.tty.replace(event_index);
}
JournalEntry::ChangeDirectoryV1 { .. } => {
state.chdir.replace(event_index);
}
JournalEntry::OpenFileDescriptorV1 {
fd, o_flags, path, ..
} => {
Expand Down Expand Up @@ -396,13 +421,42 @@ impl WritableJournal for CompactingJournalTx {

// Creating a file and erasing anything that was there before means
// the entire create branch that exists before this one can be ignored
if o_flags.contains(wasi::Oflags::CREATE) && o_flags.contains(wasi::Oflags::TRUNC) {
let path = path.to_string();
let path = path.to_string();
if o_flags.contains(wasi::Oflags::CREATE)
&& (o_flags.contains(wasi::Oflags::TRUNC)
|| o_flags.contains(wasi::Oflags::EXCL))
{
if let Some(existing) = state.create_trunc_file.remove(&path) {
state.suspect_descriptors.remove(&existing);
state.keep_descriptors.remove(&existing);
if let Some(remove) = state.suspect_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
}
if let Some(existing) = state.modify_file.remove(&path) {
if let Some(remove) = state.suspect_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
}
if let Some(existing) = state.create_trunc_file.insert(path, *fd) {
if let Some(remove) = state.suspect_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
}
} else if let Some(existing) = state.modify_file.insert(path, *fd) {
if let Some(remove) = state.suspect_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
state.create_trunc_file.insert(path, *fd);
}
}
// We keep non-mutable events for file descriptors that are suspect
Expand All @@ -411,29 +465,38 @@ impl WritableJournal for CompactingJournalTx {
// Get the lookup
// (if its suspect then it will remove the entry and
// thus the entire branch of events it represents is discarded)
let mut skip = false;
let mut erase = false;
let lookup = if matches!(&entry, JournalEntry::CloseFileDescriptorV1 { .. }) {
if state.open_sockets.remove(fd).is_some() {
skip = true;
erase = true;
}
if state.open_pipes.remove(fd).is_some() {
skip = true;
erase = true;
}
match state.suspect_descriptors.remove(fd) {
Some(a) => {
erase = true;
Some(a)
}
None => None,
}
state.suspect_descriptors.remove(fd)
} else {
state.suspect_descriptors.get(fd).cloned()
};
let lookup = lookup
.or_else(|| state.keep_descriptors.get(fd).cloned())
.or_else(|| state.stdio_descriptors.get(fd).cloned());

if !skip {
// If we are to erase all these events as if they never happened then do so
if erase {
if let Some(lookup) = lookup {
let state = state.descriptors.entry(lookup).or_default();
state.events.push(event_index);
} else {
state.whitelist.insert(event_index);
state.descriptors.remove(&lookup);
}
} else if let Some(lookup) = lookup {
let state = state.descriptors.entry(lookup).or_default();
state.events.push(event_index);
} else {
state.whitelist.insert(event_index);
}
}
// Things that modify a file descriptor mean that it is
Expand Down Expand Up @@ -514,14 +577,96 @@ impl WritableJournal for CompactingJournalTx {
// Creating a new directory only needs to be done once
JournalEntry::CreateDirectoryV1 { path, .. } => {
let path = path.to_string();
state.remove_directory.remove(&path);
state.create_directory.entry(path).or_insert(event_index);

// Newly created directories are stored as a set of .
let lookup = match state.create_directory.get(&path) {
Some(lookup) => *lookup,
None => {
let lookup = DescriptorLookup(state.descriptor_seed);
state.descriptor_seed += 1;
state.create_directory.insert(path, lookup);
lookup
}
};

// Add the event that creates the directory
state
.descriptors
.entry(lookup)
.or_default()
.events
.push(event_index);
}
// Deleting a directory only needs to be done once
JournalEntry::RemoveDirectoryV1 { path, .. } => {
let path = path.to_string();
state.create_directory.remove(&path);
state.remove_directory.entry(path).or_insert(event_index);
state.remove_directory.insert(path, event_index);
}
// Unlinks the file from the file system
JournalEntry::UnlinkFileV1 { path, .. } => {
let path = path.to_string();
if let Some(existing) = state
.create_trunc_file
.remove(&path)
.or_else(|| state.modify_file.remove(&path))
{
if let Some(remove) = state.suspect_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&existing) {
state.descriptors.remove(&remove);
}
}
state.unlink_file.insert(path, event_index);
}
// Renames may update some of the tracking functions
JournalEntry::PathRenameV1 {
old_path, new_path, ..
} => {
let old_path = old_path.to_string();
let new_path = new_path.to_string();

if let Some(existing) = state.create_trunc_file.remove(&old_path) {
if let Some(replaces) = state.create_trunc_file.insert(new_path, existing) {
if let Some(remove) = state.suspect_descriptors.remove(&replaces) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&replaces) {
state.descriptors.remove(&remove);
}
}
} else if let Some(existing) = state.modify_file.remove(&old_path) {
if let Some(replaces) = state.modify_file.insert(new_path, existing) {
if let Some(remove) = state.suspect_descriptors.remove(&replaces) {
state.descriptors.remove(&remove);
}
if let Some(remove) = state.keep_descriptors.remove(&replaces) {
state.descriptors.remove(&remove);
}
}
} else if let Some(existing) = state.create_directory.remove(&old_path) {
if let Some(replaces) = state.create_directory.insert(new_path, existing) {
state.descriptors.remove(&replaces);
}
} else {
state.whitelist.insert(event_index);
}
}
// Update all the directory operations
JournalEntry::PathSetTimesV1 { path, .. } => {
let path = path.to_string();
let lookup = state.create_directory.get(&path).cloned();
if let Some(lookup) = lookup {
state
.descriptors
.entry(lookup)
.or_default()
.events
.push(event_index);
} else {
state.whitelist.insert(event_index);
}
}
// Pipes that remain open at the end will be added
JournalEntry::CreatePipeV1 { fd1, fd2, .. } => {
Expand Down
2 changes: 2 additions & 0 deletions lib/journal/src/concrete/printing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ impl<'a> fmt::Display for JournalEntry<'a> {
if o_flags.contains(wasi::Oflags::CREATE) {
if o_flags.contains(wasi::Oflags::TRUNC) {
write!(f, "fd-create-new (fd={}, path={})", fd, path)
} else if o_flags.contains(wasi::Oflags::EXCL) {
write!(f, "fd-create-excl (fd={}, path={})", fd, path)
} else {
write!(f, "fd-create (fd={}, path={})", fd, path)
}
Expand Down
5 changes: 4 additions & 1 deletion lib/virtual-fs/src/overlay_fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,10 @@ where

fn poll_copy_start_and_progress(&mut self, cx: &mut Context) -> Poll<io::Result<()>> {
replace_with_or_abort(&mut self.state, |state| match state {
CowState::ReadOnly(inner) => CowState::SeekingGet(inner),
CowState::ReadOnly(inner) => {
tracing::trace!("COW file touched, starting file clone",);
CowState::SeekingGet(inner)
}
state => state,
});
self.poll_copy_progress(cx)
Expand Down
10 changes: 5 additions & 5 deletions lib/vm/src/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl VMOwnedMemory {
pub fn new_with_file(
memory: &MemoryType,
style: &MemoryStyle,
backing_file: std::fs::File,
backing_file: std::path::PathBuf,
memory_type: MmapType,
) -> Result<Self, MemoryError> {
unsafe { Self::new_internal(memory, style, None, Some(backing_file), memory_type) }
Expand Down Expand Up @@ -261,7 +261,7 @@ impl VMOwnedMemory {
memory: &MemoryType,
style: &MemoryStyle,
vm_memory_location: NonNull<VMMemoryDefinition>,
backing_file: Option<std::fs::File>,
backing_file: Option<std::path::PathBuf>,
memory_type: MmapType,
) -> Result<Self, MemoryError> {
Self::new_internal(
Expand All @@ -278,7 +278,7 @@ impl VMOwnedMemory {
memory: &MemoryType,
style: &MemoryStyle,
vm_memory_location: Option<NonNull<VMMemoryDefinition>>,
backing_file: Option<std::fs::File>,
backing_file: Option<std::path::PathBuf>,
memory_type: MmapType,
) -> Result<Self, MemoryError> {
if memory.minimum > Pages::max_value() {
Expand Down Expand Up @@ -463,7 +463,7 @@ impl VMSharedMemory {
pub fn new_with_file(
memory: &MemoryType,
style: &MemoryStyle,
backing_file: std::fs::File,
backing_file: std::path::PathBuf,
memory_type: MmapType,
) -> Result<Self, MemoryError> {
Ok(VMOwnedMemory::new_with_file(memory, style, backing_file, memory_type)?.to_shared())
Expand Down Expand Up @@ -497,7 +497,7 @@ impl VMSharedMemory {
memory: &MemoryType,
style: &MemoryStyle,
vm_memory_location: NonNull<VMMemoryDefinition>,
backing_file: Option<std::fs::File>,
backing_file: Option<std::path::PathBuf>,
memory_type: MmapType,
) -> Result<Self, MemoryError> {
Ok(VMOwnedMemory::from_definition_with_file(
Expand Down
Loading
Loading