Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support building object_store and parquet on wasm32-unknown-unknown target #2896

Merged
merged 2 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/setup-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ runs:
echo "Installing ${{ inputs.rust-version }}"
rustup toolchain install ${{ inputs.rust-version }}
rustup default ${{ inputs.rust-version }}
rustup target add wasm32-unknown-unknown
echo "CARGO_TARGET_DIR=/github/home/target" >> $GITHUB_ENV
3 changes: 3 additions & 0 deletions .github/workflows/parquet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ jobs:
- name: Check compilation --all-targets --no-default-features --features json
run: |
cargo check -p parquet --all-targets --no-default-features --features json
- name: Check compilation wasm32-unknown-unknown
run: |
cargo check -p parquet --no-default-features --features cli,snap,flate2,brotli --target wasm32-unknown-unknown
clippy:
name: Clippy
Expand Down
6 changes: 4 additions & 2 deletions object_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ itertools = "0.10.1"
parking_lot = { version = "0.12" }
percent-encoding = "2.1"
snafu = "0.7"
tokio = { version = "1.18", features = ["sync", "macros", "parking_lot", "rt-multi-thread", "time", "io-util"] }
tokio = { version = "1.18", features = ["sync", "macros", "rt", "time", "io-util"] }
tracing = { version = "0.1" }
url = "2.2"
walkdir = "2"
Expand All @@ -51,9 +51,11 @@ rand = { version = "0.8", default-features = false, features = ["std", "std_rng"
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"], optional = true }
ring = { version = "0.16", default-features = false, features = ["std"], optional = true }
rustls-pemfile = { version = "1.0", default-features = false, optional = true }
# Fix for wasm32-unknown-unknown (see https://docs.rs/getrandom/latest/getrandom/#webassembly-support)
getrandom = { version = "0.2", features = ["js"], optional = true }

[features]
cloud = ["serde", "serde_json", "quick-xml", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring"]
cloud = ["serde", "serde_json", "quick-xml", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring", "getrandom"]
azure = ["cloud"]
gcp = ["cloud", "rustls-pemfile"]
aws = ["cloud"]
Expand Down
9 changes: 8 additions & 1 deletion object_store/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ change. Supported object stores include:
* Memory
* Custom implementations


Originally developed for [InfluxDB IOx](https://github.com/influxdata/influxdb_iox/) and later split out and donated to [Apache Arrow](https://arrow.apache.org/).

See [docs.rs](https://docs.rs/object_store) for usage instructions

## Support for `wasm32-unknown-unknown` target

It's possible to build `object_store` for the `wasm32-unknown-unknown` target, however the cloud storage features `aws`, `azure`, and `gcp` are not supported.

```
cargo build -p object_store --target wasm32-unknown-unknown
```
20 changes: 17 additions & 3 deletions object_store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,20 @@
//! ```
//!
#[cfg(all(
target_arch = "wasm32",
any(feature = "gcp", feature = "aws", feature = "azure",)
))]
compile_error!("Features 'gcp', 'aws', 'azure' are not supported on wasm.");

#[cfg(feature = "aws")]
pub mod aws;
#[cfg(feature = "azure")]
pub mod azure;
#[cfg(feature = "gcp")]
pub mod gcp;
pub mod limit;
#[cfg(not(target_arch = "wasm32"))]
pub mod local;
pub mod memory;
pub mod path;
Expand All @@ -176,15 +183,16 @@ mod multipart;
mod util;

use crate::path::Path;
use crate::util::{
coalesce_ranges, collect_bytes, maybe_spawn_blocking, OBJECT_STORE_COALESCE_DEFAULT,
};
#[cfg(not(target_arch = "wasm32"))]
use crate::util::maybe_spawn_blocking;
use crate::util::{coalesce_ranges, collect_bytes, OBJECT_STORE_COALESCE_DEFAULT};
use async_trait::async_trait;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::{stream::BoxStream, StreamExt};
use snafu::Snafu;
use std::fmt::{Debug, Formatter};
#[cfg(not(target_arch = "wasm32"))]
use std::io::{Read, Seek, SeekFrom};
use std::ops::Range;
use tokio::io::AsyncWrite;
Expand Down Expand Up @@ -351,6 +359,7 @@ impl GetResult {
/// Collects the data into a [`Bytes`]
pub async fn bytes(self) -> Result<Bytes> {
match self {
#[cfg(not(target_arch = "wasm32"))]
Self::File(mut file, path) => {
maybe_spawn_blocking(move || {
let len = file.seek(SeekFrom::End(0)).map_err(|source| {
Expand All @@ -377,6 +386,8 @@ impl GetResult {
.await
}
Self::Stream(s) => collect_bytes(s, None).await,
#[cfg(target_arch = "wasm32")]
_ => unimplemented!("File IO not implemented on wasm32."),
}
}

Expand All @@ -396,6 +407,7 @@ impl GetResult {
/// no additional complexity or overheads
pub fn into_stream(self) -> BoxStream<'static, Result<Bytes>> {
match self {
#[cfg(not(target_arch = "wasm32"))]
Self::File(file, path) => {
const CHUNK_SIZE: usize = 8 * 1024;

Expand Down Expand Up @@ -424,6 +436,8 @@ impl GetResult {
.boxed()
}
Self::Stream(s) => s,
#[cfg(target_arch = "wasm32")]
_ => unimplemented!("File IO not implemented on wasm32."),
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions object_store/src/path/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
//! Path abstraction for Object Storage
use itertools::Itertools;
#[cfg(not(target_arch = "wasm32"))]
use percent_encoding::percent_decode;
use snafu::{ensure, ResultExt, Snafu};
use std::fmt::Formatter;
#[cfg(not(target_arch = "wasm32"))]
use url::Url;

/// The delimiter to separate object namespaces, creating a directory structure.
Expand Down Expand Up @@ -160,6 +162,7 @@ impl Path {
})
}

#[cfg(not(target_arch = "wasm32"))]
/// Convert a filesystem path to a [`Path`] relative to the filesystem root
///
/// This will return an error if the path contains illegal character sequences
Expand All @@ -176,6 +179,7 @@ impl Path {
Self::from_absolute_path(absolute)
}

#[cfg(not(target_arch = "wasm32"))]
/// Convert an absolute filesystem path to a [`Path`] relative to the filesystem root
///
/// This will return an error if the path contains illegal character sequences
Expand All @@ -184,6 +188,7 @@ impl Path {
Self::from_absolute_path_with_base(path, None)
}

#[cfg(not(target_arch = "wasm32"))]
/// Convert a filesystem path to a [`Path`] relative to the provided base
///
/// This will return an error if the path contains illegal character sequences
Expand Down Expand Up @@ -308,6 +313,7 @@ where
}
}

#[cfg(not(target_arch = "wasm32"))]
/// Given an absolute filesystem path convert it to a URL representation without canonicalization
pub(crate) fn absolute_path_to_url(
path: impl AsRef<std::path::Path>,
Expand Down
1 change: 1 addition & 0 deletions object_store/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ where
}
}

#[cfg(not(target_arch = "wasm32"))]
/// Takes a function and spawns it to a tokio blocking pool if available
pub async fn maybe_spawn_blocking<F, T>(f: F) -> Result<T>
where
Expand Down
7 changes: 4 additions & 3 deletions parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ edition = "2021"
rust-version = "1.62"

[dependencies]
ahash = "0.8"
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }
bytes = { version = "1.1", default-features = false, features = ["std"] }
thrift = { version = "0.16", default-features = false }
snap = { version = "1.0", default-features = false, optional = true }
Expand All @@ -46,9 +46,8 @@ base64 = { version = "0.13", default-features = false, features = ["std"], optio
clap = { version = "4", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
seq-macro = { version = "0.3", default-features = false }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
futures = { version = "0.3", default-features = false, features = ["std"], optional = true }
tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "fs", "rt", "io-util"] }
tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] }
hashbrown = { version = "0.12", default-features = false }

[dev-dependencies]
Expand All @@ -62,6 +61,8 @@ lz4 = { version = "1.23", default-features = false }
zstd = { version = "0.11", default-features = false }
serde_json = { version = "1.0", features = ["std"], default-features = false }
arrow = { path = "../arrow", version = "25.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint", "json"] }
tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "io-util", "fs"] }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }

[package.metadata.docs.rs]
all-features = true
Expand Down
8 changes: 8 additions & 0 deletions parquet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ The `parquet` crate provides the following features which may be enabled in your
- [ ] Predicate pushdown
- [x] Parquet format 4.0.0 support

## Support for `wasm32-unknown-unknown` target

It's possible to build `parquet` for the `wasm32-unknown-unknown` target, however not all the compression features are currently unsupported due to issues with the upstream crates. In particular, the `zstd` and `lz4` features may have compilation issues. See issue [#180](https://github.com/apache/arrow-rs/issues/180).

```
cargo build -p parquet --target wasm32-unknown-unknown --no-default-features --features cli,snap,flate2,brotli
```

## License

Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.