Skip to content

Commit

Permalink
Address MetadataLoader clippy errors, correct docs + add a suffix exa…
Browse files Browse the repository at this point in the history
…mple
  • Loading branch information
H-Plus-Time committed Aug 1, 2024
1 parent 76a4633 commit 059b4e4
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
2 changes: 1 addition & 1 deletion parquet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ serde_json = { version = "1.0", features = ["std"], default-features = false }
arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] }
tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "io-util", "fs"] }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
object_store = { version = "0.10.0", default-features = false, features = ["azure"] }
object_store = { version = "0.10.0", default-features = false, features = ["azure", "aws"] }

# TODO: temporary to fix parquet wasm build
# upstream issue: https://github.com/gyscos/zstd-rs/issues/269
Expand Down
6 changes: 2 additions & 4 deletions parquet/src/arrow/async_reader/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
let metadata_offset = length + 8;
let meta = fetch.fetch(GetRange::Suffix(metadata_offset)).await?;
let slice = &meta[0..length];
(decode_metadata(&slice)?, None)
(decode_metadata(slice)?, None)
} else {
let metadata_offset = length + 8;
let metadata_start = suffix_len - metadata_offset;
Expand Down Expand Up @@ -293,9 +293,7 @@ mod tests {
let range = match range {
GetRange::Bounded(range) => range,
GetRange::Offset(offset) => offset..file_size,
GetRange::Suffix(end_offset) => {
file_size.saturating_sub(end_offset.try_into().unwrap())..file_size
}
GetRange::Suffix(end_offset) => file_size.saturating_sub(end_offset)..file_size,
};
file.seek(SeekFrom::Start(range.start as _))?;
let len = range.end - range.start;
Expand Down
19 changes: 16 additions & 3 deletions parquet/src/arrow/async_reader/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,35 @@ impl From<GetRange> for object_store::GetRange {
/// # use std::io::stdout;
/// # use std::sync::Arc;
/// # use object_store::azure::MicrosoftAzureBuilder;
/// # use object_store::aws::AmazonS3Builder;
/// # use object_store::ObjectStore;
/// # use object_store::path::Path;
/// # use parquet::arrow::async_reader::ParquetObjectReader;
/// # use parquet::arrow::ParquetRecordBatchStreamBuilder;
/// # use parquet::schema::printer::print_parquet_metadata;
/// # async fn run() {
/// // Object Stores that support suffix ranges:
/// // Populate configuration from environment
/// let storage_container = Arc::new(AmazonS3Builder::from_env().build().unwrap());
/// let location = Path::from("path/to/blob.parquet");
///
/// // Show Parquet metadata
/// let reader = ParquetObjectReader::new(storage_container, location);
/// let builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
/// print_parquet_metadata(&mut stdout(), builder.metadata());
/// # }
/// # async fn run_non_suffixed() {
/// // Populate configuration from environment
/// let storage_container = Arc::new(MicrosoftAzureBuilder::from_env().build().unwrap());
/// let location = Path::from("path/to/blob.parquet");
/// let meta = storage_container.head(&location).await.unwrap();
/// println!("Found Blob with {}B at {}", meta.size, meta.location);
///
/// // Show Parquet metadata
/// let reader = ParquetObjectReader::new(storage_container, meta);
/// let reader = ParquetObjectReader::new(storage_container, location).with_file_size(meta.size);
/// let builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
/// print_parquet_metadata(&mut stdout(), builder.metadata());
///
/// # }
/// ```
#[derive(Clone, Debug)]
Expand All @@ -76,9 +89,9 @@ pub struct ParquetObjectReader {
}

impl ParquetObjectReader {
/// Creates a new [`ParquetObjectReader`] for the provided [`ObjectStore`] and [`ObjectMeta`]
/// Creates a new [`ParquetObjectReader`] for the provided [`ObjectStore`] and [`Path`]
///
/// [`ObjectMeta`] can be obtained using [`ObjectStore::list`] or [`ObjectStore::head`]
/// [file_size] can be obtained using [`ObjectStore::list`] or [`ObjectStore::head`]
pub fn new(store: Arc<dyn ObjectStore>, location: Path) -> Self {
Self {
store,
Expand Down

0 comments on commit 059b4e4

Please sign in to comment.