Skip to content

Commit

Permalink
DataLoaders 2: add text-based DataLoader (.txt, .md) (#4518)
Browse files Browse the repository at this point in the history
What the title says.


![image](https://github.com/rerun-io/rerun/assets/2910679/68f2e499-f4df-4e75-95f6-0ed7f479c5e6)


Checks:
- [x] `cargo r -p rerun-cli --no-default-features --features
native_viewer --
examples/assets/example.{glb,gltf,obj,jpg,png,rrd,txt,md}`
- [x] Native: `File > Open > examples/assets/*`
- [x] Native: `Drag-n-drop > examples/assets/*`
- [x] Web: `File > Open > examples/assets/*`
- [x] Web: `Drag-n-drop > examples/assets/*`

---

Part of a series of PRs to make it possible to load _any_ file from the
local filesystem, by any means, on web and native:
- #4516
- #4517 
- #4518 
- #4519 
- #4520 
- #4521 
- TODO: register custom loaders
- TODO: high level docs and guides for everything related to loading
files
  • Loading branch information
teh-cmc authored Dec 15, 2023
1 parent b51df6a commit 8f4f4e9
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 13 deletions.
1 change: 1 addition & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ extend-exclude = [
".typos.toml",
"crates/re_ui/data/design_tokens.json",
"crates/re_ui/src/design_tokens.rs",
"examples/assets",
]


Expand Down
32 changes: 31 additions & 1 deletion crates/re_data_source/src/data_loader/loader_archetype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,15 @@ impl DataLoader for ArchetypeLoader {
entity_path,
contents.into_owned(),
)?);
};
} else if crate::SUPPORTED_TEXT_EXTENSIONS.contains(&extension.as_str()) {
re_log::debug!(?filepath, loader = self.name(), "Loading text document…",);
rows.extend(load_text_document(
filepath,
timepoint,
entity_path,
contents.into_owned(),
)?);
}

for row in rows {
if tx.send(row.into()).is_err() {
Expand Down Expand Up @@ -153,3 +161,25 @@ fn load_image(

Ok(rows.into_iter())
}

fn load_text_document(
filepath: std::path::PathBuf,
timepoint: TimePoint,
entity_path: EntityPath,
contents: Vec<u8>,
) -> Result<impl ExactSizeIterator<Item = DataRow>, DataLoaderError> {
re_tracing::profile_function!();

let rows = [
{
let arch = re_types::archetypes::TextDocument::from_file_contents(
contents,
re_types::components::MediaType::guess_from_path(filepath),
)?;
DataRow::from_archetype(RowId::new(), timepoint, entity_path, &arch)?
},
//
];

Ok(rows.into_iter())
}
2 changes: 2 additions & 0 deletions crates/re_data_source/src/data_loader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use re_log_types::{ArrowMsg, DataRow, LogMsg};
/// - [`ArchetypeLoader`] for:
/// - [3D models]
/// - [Images]
/// - [Text files]
///
/// ## Execution
///
Expand All @@ -38,6 +39,7 @@ use re_log_types::{ArrowMsg, DataRow, LogMsg};
/// [Rerun extensions]: crate::SUPPORTED_RERUN_EXTENSIONS
/// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS
/// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS
/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS
//
// TODO(#4525): `DataLoader`s should support arbitrary URIs
// TODO(#4526): `DataLoader`s should be exposed to the SDKs
Expand Down
11 changes: 6 additions & 5 deletions crates/re_data_source/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@
//! - Over WebSockets
//! - From disk
//!
//! Also handles different file types:
//!
//! - .rrd
//! - images
//! - meshes
//! Also handles different file types: rrd, images, text files, 3D models, point clouds…
mod data_loader;
mod data_source;
Expand Down Expand Up @@ -51,12 +47,16 @@ pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj"];

pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rrd"];

// TODO(#4555): Add catch-all builtin `DataLoader` for text files
pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"];

/// All file extension supported by our builtin [`DataLoader`]s.
pub fn supported_extensions() -> impl Iterator<Item = &'static str> {
SUPPORTED_RERUN_EXTENSIONS
.iter()
.chain(SUPPORTED_IMAGE_EXTENSIONS)
.chain(SUPPORTED_MESH_EXTENSIONS)
.chain(SUPPORTED_TEXT_EXTENSIONS)
.copied()
}

Expand All @@ -65,4 +65,5 @@ pub fn is_supported_file_extension(extension: &str) -> bool {
SUPPORTED_IMAGE_EXTENSIONS.contains(&extension)
|| SUPPORTED_MESH_EXTENSIONS.contains(&extension)
|| SUPPORTED_RERUN_EXTENSIONS.contains(&extension)
|| SUPPORTED_TEXT_EXTENSIONS.contains(&extension)
}
2 changes: 1 addition & 1 deletion crates/re_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ features = ["all"]
default = []

## All features except `testing`.
all = ["ecolor", "glam", "serde"]
all = ["ecolor", "glam", "image", "serde"]

## Enables the `datagen` module, which exposes a number of tools for generating random data for
## tests and benchmarks.
Expand Down
6 changes: 3 additions & 3 deletions crates/re_types/src/archetypes/image_ext.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use image::ImageFormat;

use crate::{
datatypes::TensorData,
image::{find_non_empty_dim_indices, ImageConstructionError},
Expand Down Expand Up @@ -49,6 +47,7 @@ impl Image {
/// Creates a new [`Image`] from a file.
///
/// The image format will be inferred from the path (extension), or the contents if that fails.
#[cfg(feature = "image")]
#[cfg(not(target_arch = "wasm32"))]
#[inline]
pub fn from_file_path(filepath: impl AsRef<std::path::Path>) -> anyhow::Result<Self> {
Expand All @@ -61,10 +60,11 @@ impl Image {
/// Creates a new [`Image`] from the contents of a file.
///
/// If unspecified, the image format will be inferred from the contents.
#[cfg(feature = "image")]
#[inline]
pub fn from_file_contents(
contents: Vec<u8>,
format: Option<ImageFormat>,
format: Option<image::ImageFormat>,
) -> anyhow::Result<Self> {
let format = if let Some(format) = format {
format
Expand Down
1 change: 1 addition & 0 deletions crates/re_types/src/archetypes/mod.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions crates/re_types/src/archetypes/text_document_ext.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use crate::components::MediaType;

use super::TextDocument;

impl TextDocument {
/// Creates a new [`TextDocument`] from a utf8 file.
///
/// The media type will be inferred from the path (extension), or the contents if that fails.
#[cfg(not(target_arch = "wasm32"))]
pub fn from_file_path(filepath: impl AsRef<std::path::Path>) -> anyhow::Result<Self> {
use anyhow::Context as _;

let filepath = filepath.as_ref();
let contents = std::fs::read(filepath)
.with_context(|| format!("could not read file contents: {filepath:?}"))?;
Self::from_file_contents(contents, MediaType::guess_from_path(filepath))
.with_context(|| format!("could not parse file contents: {filepath:?}"))
}

/// Creates a new [`TextDocument`] from the contents of a utf8 file.
///
/// If unspecified, the media type will be inferred from the contents.
#[inline]
pub fn from_file_contents(
contents: Vec<u8>,
media_type: Option<impl Into<MediaType>>,
) -> anyhow::Result<Self> {
let media_type = media_type.map(Into::into);
let media_type = MediaType::or_guess_from_data(media_type, &contents);
Ok(Self {
text: String::from_utf8(contents)?.into(),
media_type,
})
}
}
3 changes: 2 additions & 1 deletion docs/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"version": "0.2",
"usePnP": true,
"ignorePaths": [
"node_modules/**"
"node_modules/**",
"../examples/assets/**"
],
"ignoreWords": [
"-useb",
Expand Down
21 changes: 21 additions & 0 deletions examples/assets/example.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Huge

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque eleifend mi eget tellus pellentesque, sit amet ultricies tortor iaculis. Pellentesque viverra ipsum ut nisl rutrum maximus. Nunc vehicula lectus quis lacinia condimentum. Suspendisse lobortis mattis nisl, id egestas dui condimentum vitae. Integer lacus quam, commodo eget pretium at, scelerisque ac diam. Donec consectetur mauris felis. Proin pharetra, velit in venenatis facilisis, nunc diam blandit justo, sit amet laoreet ex nunc ut mi. Morbi ut tincidunt leo, eu maximus urna.

## Big

Aenean odio enim, elementum non nibh at, lacinia pretium elit. Quisque sit amet porta metus. Nunc in arcu turpis. Sed non tristique tellus, eget ultrices arcu. In rhoncus nibh in dolor pellentesque, id suscipit sem volutpat. Suspendisse condimentum tempor ante, sed rutrum eros viverra vitae. Sed vitae vestibulum eros, eu auctor velit. Praesent a efficitur elit. Nulla finibus porttitor tortor nec semper.

Nulla fermentum est ac convallis bibendum. Ut cursus, libero at sollicitudin laoreet, nunc ante aliquam dolor, quis malesuada mi ligula a sapien. Pellentesque sollicitudin odio a tempor tempus. Ut finibus nulla eget placerat hendrerit. Aenean eu arcu metus. Aliquam erat volutpat. Sed in ullamcorper mauris. Ut sollicitudin nisi fermentum, molestie justo eu, malesuada magna. Proin semper nisi sit amet pulvinar lacinia. Etiam purus magna, accumsan facilisis tellus eu, tincidunt dignissim dui.

### Less big

In massa arcu, finibus congue vulputate quis, pulvinar ac est. Morbi felis nibh, cursus ut mi id, rutrum rutrum est. Cras interdum enim non ipsum ornare commodo. Ut blandit, dui quis efficitur eleifend, urna nisl cursus metus, at placerat tortor orci et sem. Morbi sodales felis sed mattis tempus. Vivamus scelerisque dignissim mi. Etiam elementum mattis turpis, id porttitor arcu. Maecenas dui ipsum, scelerisque non molestie eu, hendrerit in justo. In hac habitasse platea dictumst. Curabitur faucibus hendrerit turpis quis gravida.

Etiam velit mauris, varius in aliquam eu, malesuada eu massa. Nulla eu arcu in velit bibendum volutpat. Nulla sollicitudin lectus nisi, ac efficitur nibh consectetur vitae. Fusce a placerat turpis. Nullam tincidunt sed nulla sed vulputate. In id pharetra libero, congue aliquet justo. In laoreet, odio a interdum fermentum, leo orci efficitur turpis, at tempus diam sem quis diam. Vestibulum ultricies urna eget mi dignissim convallis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nulla ut gravida ante, eget sodales metus. Integer pellentesque tempus magna, a scelerisque mi suscipit sed. Aenean sed malesuada ex.

#### Tiny

```rust
"with some code!"
```
9 changes: 9 additions & 0 deletions examples/assets/example.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque eleifend mi eget tellus pellentesque, sit amet ultricies tortor iaculis. Pellentesque viverra ipsum ut nisl rutrum maximus. Nunc vehicula lectus quis lacinia condimentum. Suspendisse lobortis mattis nisl, id egestas dui condimentum vitae. Integer lacus quam, commodo eget pretium at, scelerisque ac diam. Donec consectetur mauris felis. Proin pharetra, velit in venenatis facilisis, nunc diam blandit justo, sit amet laoreet ex nunc ut mi. Morbi ut tincidunt leo, eu maximus urna.

Aenean odio enim, elementum non nibh at, lacinia pretium elit. Quisque sit amet porta metus. Nunc in arcu turpis. Sed non tristique tellus, eget ultrices arcu. In rhoncus nibh in dolor pellentesque, id suscipit sem volutpat. Suspendisse condimentum tempor ante, sed rutrum eros viverra vitae. Sed vitae vestibulum eros, eu auctor velit. Praesent a efficitur elit. Nulla finibus porttitor tortor nec semper.

Nulla fermentum est ac convallis bibendum. Ut cursus, libero at sollicitudin laoreet, nunc ante aliquam dolor, quis malesuada mi ligula a sapien. Pellentesque sollicitudin odio a tempor tempus. Ut finibus nulla eget placerat hendrerit. Aenean eu arcu metus. Aliquam erat volutpat. Sed in ullamcorper mauris. Ut sollicitudin nisi fermentum, molestie justo eu, malesuada magna. Proin semper nisi sit amet pulvinar lacinia. Etiam purus magna, accumsan facilisis tellus eu, tincidunt dignissim dui.

In massa arcu, finibus congue vulputate quis, pulvinar ac est. Morbi felis nibh, cursus ut mi id, rutrum rutrum est. Cras interdum enim non ipsum ornare commodo. Ut blandit, dui quis efficitur eleifend, urna nisl cursus metus, at placerat tortor orci et sem. Morbi sodales felis sed mattis tempus. Vivamus scelerisque dignissim mi. Etiam elementum mattis turpis, id porttitor arcu. Maecenas dui ipsum, scelerisque non molestie eu, hendrerit in justo. In hac habitasse platea dictumst. Curabitur faucibus hendrerit turpis quis gravida.

Etiam velit mauris, varius in aliquam eu, malesuada eu massa. Nulla eu arcu in velit bibendum volutpat. Nulla sollicitudin lectus nisi, ac efficitur nibh consectetur vitae. Fusce a placerat turpis. Nullam tincidunt sed nulla sed vulputate. In id pharetra libero, congue aliquet justo. In laoreet, odio a interdum fermentum, leo orci efficitur turpis, at tempus diam sem quis diam. Vestibulum ultricies urna eget mi dignissim convallis. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nulla ut gravida ante, eget sodales metus. Integer pellentesque tempus magna, a scelerisque mi suscipit sed. Aenean sed malesuada ex.
5 changes: 3 additions & 2 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,16 +834,17 @@ def main() -> None:
]

exclude_paths = (
"./rerun_cpp/docs/html",
"./.github/workflows/reusable_checks.yml", # zombie TODO hunting job
"./CODE_STYLE.md",
"./crates/re_types_builder/src/reflection.rs", # auto-generated
"./examples/assets",
"./examples/python/detect_and_track_objects/cache/version.txt",
"./examples/rust/objectron/src/objectron.rs", # auto-generated
"./rerun_cpp/docs/doxygen-awesome/", # copied from an external repository
"./rerun_cpp/docs/html",
"./scripts/lint.py", # we contain all the patterns we are linting against
"./scripts/zombie_todos.py",
"./web_viewer/re_viewer.js", # auto-generated by wasm_bindgen
"./rerun_cpp/docs/doxygen-awesome/", # copied from an external repository
)

should_ignore = parse_gitignore(".gitignore") # TODO(emilk): parse all .gitignore files, not just top-level
Expand Down

0 comments on commit 8f4f4e9

Please sign in to comment.