From c5c2437e0816c1577be6d7603fd71105c7e82f97 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 13 Dec 2023 19:04:57 +0100 Subject: [PATCH 1/4] implement folder loading support --- .../src/data_loader/loader_directory.rs | 75 +++++++++++++++++++ crates/re_data_source/src/data_loader/mod.rs | 3 + crates/re_data_source/src/load_file.rs | 8 +- 3 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 crates/re_data_source/src/data_loader/loader_directory.rs diff --git a/crates/re_data_source/src/data_loader/loader_directory.rs b/crates/re_data_source/src/data_loader/loader_directory.rs new file mode 100644 index 000000000000..c029aa8d7038 --- /dev/null +++ b/crates/re_data_source/src/data_loader/loader_directory.rs @@ -0,0 +1,75 @@ +// TODO: issue -> HIVE partitiong, timestamp regexes, zip files, that kinda thing + +/// Loads entire directories, using the appropriate [`crate::DataLoader`]:s for each files within. +pub struct DirectoryLoader; + +impl crate::DataLoader for DirectoryLoader { + #[inline] + fn name(&self) -> String { + "rerun.data_loaders.Directory".into() + } + + #[cfg(not(target_arch = "wasm32"))] + fn load_from_file( + &self, + store_id: re_log_types::StoreId, + dirpath: std::path::PathBuf, + tx: std::sync::mpsc::Sender, + ) -> Result<(), crate::DataLoaderError> { + if dirpath.is_file() { + return Ok(()); // simply not interested + } + + re_tracing::profile_function!(dirpath.display().to_string()); + + re_log::debug!(?dirpath, loader = self.name(), "Loading directory…",); + + for entry in walkdir::WalkDir::new(&dirpath) { + let entry = match entry { + Ok(entry) => entry, + Err(err) => { + re_log::error!(loader = self.name(), ?dirpath, %err, "Failed to open filesystem entry"); + continue; + } + }; + + let filepath = entry.path(); + if filepath.is_file() { + let store_id = store_id.clone(); + let filepath = filepath.to_owned(); + let tx = tx.clone(); + + // NOTE: spawn is fine, this whole function is native-only. + rayon::spawn(move || { + let data = match crate::load_file::load(&store_id, &filepath, false, None) { + Ok(data) => data, + Err(err) => { + re_log::error!(?filepath, %err, "Failed to load directory entry"); + return; + } + }; + + for datum in data { + if tx.send(datum).is_err() { + break; + } + } + }); + } + } + + Ok(()) + } + + #[inline] + fn load_from_file_contents( + &self, + _store_id: re_log_types::StoreId, + _path: std::path::PathBuf, + _contents: std::borrow::Cow<'_, [u8]>, + _tx: std::sync::mpsc::Sender, + ) -> Result<(), crate::DataLoaderError> { + // TODO: zip file supports + Ok(()) // simply not interested + } +} diff --git a/crates/re_data_source/src/data_loader/mod.rs b/crates/re_data_source/src/data_loader/mod.rs index 530d3350a97a..84eee2cc2676 100644 --- a/crates/re_data_source/src/data_loader/mod.rs +++ b/crates/re_data_source/src/data_loader/mod.rs @@ -205,6 +205,7 @@ static BUILTIN_LOADERS: Lazy>> = Lazy::new(|| { vec![ Arc::new(RrdLoader) as Arc, Arc::new(ArchetypeLoader), + Arc::new(DirectoryLoader), ] }); @@ -217,7 +218,9 @@ pub fn iter_loaders() -> impl ExactSizeIterator> { // --- mod loader_archetype; +mod loader_directory; mod loader_rrd; pub use self::loader_archetype::ArchetypeLoader; +pub use self::loader_directory::DirectoryLoader; pub use self::loader_rrd::RrdLoader; diff --git a/crates/re_data_source/src/load_file.rs b/crates/re_data_source/src/load_file.rs index beb0ab4bd635..1603e6b7d0b4 100644 --- a/crates/re_data_source/src/load_file.rs +++ b/crates/re_data_source/src/load_file.rs @@ -98,11 +98,11 @@ pub fn extension(path: &std::path::Path) -> String { /// This does _not_ access the filesystem. #[inline] pub fn is_associated_with_builtin_loader(path: &std::path::Path, is_dir: bool) -> bool { - !is_dir && crate::is_supported_file_extension(&extension(path)) + is_dir || crate::is_supported_file_extension(&extension(path)) } /// Prepares an adequate [`re_log_types::StoreInfo`] [`LogMsg`] given the input. -fn prepare_store_info( +pub(crate) fn prepare_store_info( store_id: &re_log_types::StoreId, file_source: FileSource, path: &std::path::Path, @@ -139,7 +139,7 @@ fn prepare_store_info( /// - On native, this is filled asynchronously from other threads. /// - On wasm, this is pre-filled synchronously. #[cfg_attr(target_arch = "wasm32", allow(clippy::needless_pass_by_value))] -fn load( +pub(crate) fn load( store_id: &re_log_types::StoreId, path: &std::path::Path, is_dir: bool, @@ -218,7 +218,7 @@ fn load( /// Forwards the data in `rx_loader` to `tx`, taking care of necessary conversions, if any. /// /// Runs asynchronously from another thread on native, synchronously on wasm. -fn send( +pub(crate) fn send( store_id: &re_log_types::StoreId, rx_loader: std::sync::mpsc::Receiver, tx: &Sender, From 9e3d7e22f33012eaf5c6c8736a095a127f4fbfcc Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 14 Dec 2023 13:12:15 +0100 Subject: [PATCH 2/4] post-rebase shenaniganeries --- crates/re_data_source/src/data_loader/loader_directory.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/re_data_source/src/data_loader/loader_directory.rs b/crates/re_data_source/src/data_loader/loader_directory.rs index c029aa8d7038..d99894500c73 100644 --- a/crates/re_data_source/src/data_loader/loader_directory.rs +++ b/crates/re_data_source/src/data_loader/loader_directory.rs @@ -10,7 +10,7 @@ impl crate::DataLoader for DirectoryLoader { } #[cfg(not(target_arch = "wasm32"))] - fn load_from_file( + fn load_from_path( &self, store_id: re_log_types::StoreId, dirpath: std::path::PathBuf, @@ -62,7 +62,7 @@ impl crate::DataLoader for DirectoryLoader { } #[inline] - fn load_from_file_contents( + fn load_from_path_contents( &self, _store_id: re_log_types::StoreId, _path: std::path::PathBuf, From 18a7f241488676bf13533275bfda96180f2b3242 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 14 Dec 2023 13:17:14 +0100 Subject: [PATCH 3/4] todos --- .../re_data_source/src/data_loader/loader_directory.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/re_data_source/src/data_loader/loader_directory.rs b/crates/re_data_source/src/data_loader/loader_directory.rs index d99894500c73..68a3746200d2 100644 --- a/crates/re_data_source/src/data_loader/loader_directory.rs +++ b/crates/re_data_source/src/data_loader/loader_directory.rs @@ -1,6 +1,9 @@ -// TODO: issue -> HIVE partitiong, timestamp regexes, zip files, that kinda thing - /// Loads entire directories, using the appropriate [`crate::DataLoader`]:s for each files within. +// +// TODO(cmc): There are a lot more things than can be done be done when it comes to the semantics +// of a folder, e.g.: HIVE-like partitioning, similarly named files with different indices and/or +// timestamps (e.g. a folder of video frames), etc. +// We could support some of those at some point, or at least add examples to show users how. pub struct DirectoryLoader; impl crate::DataLoader for DirectoryLoader { @@ -69,7 +72,7 @@ impl crate::DataLoader for DirectoryLoader { _contents: std::borrow::Cow<'_, [u8]>, _tx: std::sync::mpsc::Sender, ) -> Result<(), crate::DataLoaderError> { - // TODO: zip file supports + // TODO(cmc): This could make sense to implement for e.g. archive formats (zip, tar, …) Ok(()) // simply not interested } } From 322431314d92c92cdbf98a3c66351ce3b33a6b81 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 15 Dec 2023 15:08:29 +0100 Subject: [PATCH 4/4] its recursive btw --- crates/re_data_source/src/data_loader/loader_directory.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/re_data_source/src/data_loader/loader_directory.rs b/crates/re_data_source/src/data_loader/loader_directory.rs index 68a3746200d2..bcca69919278 100644 --- a/crates/re_data_source/src/data_loader/loader_directory.rs +++ b/crates/re_data_source/src/data_loader/loader_directory.rs @@ -1,4 +1,5 @@ -/// Loads entire directories, using the appropriate [`crate::DataLoader`]:s for each files within. +/// Recursively oads entire directories, using the appropriate [`crate::DataLoader`]:s for each +/// files within. // // TODO(cmc): There are a lot more things than can be done be done when it comes to the semantics // of a folder, e.g.: HIVE-like partitioning, similarly named files with different indices and/or @@ -65,7 +66,7 @@ impl crate::DataLoader for DirectoryLoader { } #[inline] - fn load_from_path_contents( + fn load_from_file_contents( &self, _store_id: re_log_types::StoreId, _path: std::path::PathBuf,