-
Notifications
You must be signed in to change notification settings - Fork 373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce a simpler cache dedicated to just decode JPEGs #1550
Changes from 10 commits
233555b
b6c0762
78ca256
c524008
4f9b88b
66640ab
f834747
983b703
60ba461
2d37a65
8200569
742201b
cb15939
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
//! Assorted shims that should make their way back to [arrow2-convert](https://github.com/DataEngineeringLabs/arrow2-convert/) | ||
|
||
use std::ops::Index; | ||
|
||
use arrow2::{ | ||
array::{Array, BinaryArray}, | ||
buffer::Buffer, | ||
}; | ||
use arrow2_convert::{ | ||
deserialize::{ArrowArray, ArrowDeserialize}, | ||
ArrowField, ArrowSerialize, | ||
}; | ||
|
||
/// Shim to enable zero-copy arrow deserialization for `Buffer<u8>` | ||
/// Can be removed when: [arrow2-convert#103](https://github.com/DataEngineeringLabs/arrow2-convert/pull/103) lands | ||
#[derive(Clone, Debug, PartialEq, ArrowField, ArrowSerialize)] | ||
#[arrow_field(transparent)] | ||
pub struct BinaryBuffer(pub Buffer<u8>); | ||
|
||
impl BinaryBuffer { | ||
pub fn as_slice(&self) -> &[u8] { | ||
self.0.as_slice() | ||
} | ||
} | ||
|
||
impl Index<usize> for BinaryBuffer { | ||
type Output = u8; | ||
fn index(&self, i: usize) -> &u8 { | ||
&self.0[i] | ||
} | ||
} | ||
|
||
impl From<Vec<u8>> for BinaryBuffer { | ||
fn from(v: Vec<u8>) -> Self { | ||
Self(v.into()) | ||
} | ||
} | ||
|
||
/// Iterator for for [`BufferBinaryArray`] | ||
pub struct BufferBinaryArrayIter<'a> { | ||
index: usize, | ||
array: &'a BinaryArray<i32>, | ||
} | ||
|
||
impl<'a> Iterator for BufferBinaryArrayIter<'a> { | ||
type Item = Option<Buffer<u8>>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if self.index >= self.array.len() { | ||
None | ||
} else { | ||
if let Some(validity) = self.array.validity() { | ||
if !validity.get_bit(self.index) { | ||
self.index += 1; | ||
return Some(None); | ||
} | ||
} | ||
let (start, end) = self.array.offsets().start_end(self.index); | ||
self.index += 1; | ||
Some(Some(self.array.values().clone().slice(start, end - start))) | ||
} | ||
} | ||
} | ||
|
||
/// Internal `ArrowArray` helper to iterate over a `BinaryArray` while exposing Buffer slices | ||
pub struct BufferBinaryArray; | ||
|
||
extern "C" { | ||
fn do_not_call_into_iter(); // we never define this function, so the linker will fail | ||
} | ||
|
||
impl<'a> IntoIterator for &'a BufferBinaryArray { | ||
type Item = Option<Buffer<u8>>; | ||
|
||
type IntoIter = BufferBinaryArrayIter<'a>; | ||
|
||
fn into_iter(self) -> Self::IntoIter { | ||
#[allow(unsafe_code)] | ||
// SAFETY: | ||
// This exists so we get a link-error if some code tries to call into_iter | ||
// Iteration should only happen via iter_from_array_ref. | ||
// This is a quirk of the way the traits work in arrow2_convert. | ||
unsafe { | ||
do_not_call_into_iter(); | ||
} | ||
unreachable!() | ||
} | ||
} | ||
|
||
impl ArrowArray for BufferBinaryArray { | ||
type BaseArrayType = BinaryArray<i32>; | ||
#[inline] | ||
fn iter_from_array_ref(a: &dyn Array) -> <&Self as IntoIterator>::IntoIter { | ||
let b = a.as_any().downcast_ref::<Self::BaseArrayType>().unwrap(); | ||
|
||
BufferBinaryArrayIter { index: 0, array: b } | ||
} | ||
} | ||
|
||
impl ArrowDeserialize for BinaryBuffer { | ||
type ArrayType = BufferBinaryArray; | ||
|
||
#[inline] | ||
fn arrow_deserialize(v: Option<Buffer<u8>>) -> Option<Self> { | ||
v.map(BinaryBuffer) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ use arrow2_convert::{serialize::ArrowSerialize, ArrowDeserialize, ArrowField, Ar | |
use crate::msg_bundle::Component; | ||
use crate::{TensorDataType, TensorElement}; | ||
|
||
use super::arrow_convert_shims::BinaryBuffer; | ||
|
||
pub trait TensorTrait { | ||
fn id(&self) -> TensorId; | ||
fn shape(&self) -> &[TensorDimension]; | ||
|
@@ -16,6 +18,7 @@ pub trait TensorTrait { | |
fn meaning(&self) -> TensorDataMeaning; | ||
fn get(&self, index: &[u64]) -> Option<TensorElement>; | ||
fn dtype(&self) -> TensorDataType; | ||
fn size_in_bytes(&self) -> usize; | ||
} | ||
|
||
// ---------------------------------------------------------------------------- | ||
|
@@ -154,7 +157,7 @@ impl ArrowDeserialize for TensorId { | |
#[derive(Clone, Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)] | ||
#[arrow_field(type = "dense")] | ||
pub enum TensorData { | ||
U8(Vec<u8>), | ||
U8(BinaryBuffer), | ||
U16(Buffer<u16>), | ||
U32(Buffer<u32>), | ||
U64(Buffer<u64>), | ||
|
@@ -168,7 +171,7 @@ pub enum TensorData { | |
//F16(Vec<arrow2::types::f16>), | ||
F32(Buffer<f32>), | ||
F64(Buffer<f64>), | ||
JPEG(Vec<u8>), | ||
JPEG(BinaryBuffer), | ||
} | ||
|
||
/// Flattened `Tensor` data payload | ||
|
@@ -404,6 +407,21 @@ impl TensorTrait for Tensor { | |
TensorData::F64(_) => TensorDataType::F64, | ||
} | ||
} | ||
|
||
fn size_in_bytes(&self) -> usize { | ||
match &self.data { | ||
TensorData::U8(buf) | TensorData::JPEG(buf) => buf.0.len(), | ||
TensorData::U16(buf) => buf.len(), | ||
TensorData::U32(buf) => buf.len(), | ||
TensorData::U64(buf) => buf.len(), | ||
TensorData::I8(buf) => buf.len(), | ||
TensorData::I16(buf) => buf.len(), | ||
TensorData::I32(buf) => buf.len(), | ||
TensorData::I64(buf) => buf.len(), | ||
TensorData::F32(buf) => buf.len(), | ||
TensorData::F64(buf) => buf.len(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wow, it is quite surprising to me that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I filed an issue for this: jorgecarleitao/arrow2#1430 |
||
} | ||
} | ||
} | ||
|
||
impl Component for Tensor { | ||
|
@@ -535,7 +553,7 @@ impl<'a> TryFrom<&'a Tensor> for ::ndarray::ArrayViewD<'a, half::f16> { | |
|
||
#[cfg(feature = "image")] | ||
#[derive(thiserror::Error, Debug)] | ||
pub enum ImageError { | ||
pub enum TensorImageError { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Differentiating this from the image::ImageError had me scratching my head for a bit. |
||
#[error(transparent)] | ||
Image(#[from] image::ImageError), | ||
|
||
|
@@ -575,20 +593,22 @@ impl Tensor { | |
#[cfg(not(target_arch = "wasm32"))] | ||
pub fn tensor_from_jpeg_file( | ||
image_path: impl AsRef<std::path::Path>, | ||
) -> Result<Self, ImageError> { | ||
) -> Result<Self, TensorImageError> { | ||
let jpeg_bytes = std::fs::read(image_path)?; | ||
Self::tensor_from_jpeg_bytes(jpeg_bytes) | ||
} | ||
|
||
/// Construct a tensor from the contents of a JPEG file. | ||
/// | ||
/// Requires the `image` feature. | ||
pub fn tensor_from_jpeg_bytes(jpeg_bytes: Vec<u8>) -> Result<Self, ImageError> { | ||
pub fn tensor_from_jpeg_bytes(jpeg_bytes: Vec<u8>) -> Result<Self, TensorImageError> { | ||
use image::ImageDecoder as _; | ||
let jpeg = image::codecs::jpeg::JpegDecoder::new(std::io::Cursor::new(&jpeg_bytes))?; | ||
if jpeg.color_type() != image::ColorType::Rgb8 { | ||
// TODO(emilk): support gray-scale jpeg as well | ||
return Err(ImageError::UnsupportedJpegColorType(jpeg.color_type())); | ||
return Err(TensorImageError::UnsupportedJpegColorType( | ||
jpeg.color_type(), | ||
)); | ||
} | ||
let (w, h) = jpeg.dimensions(); | ||
|
||
|
@@ -599,7 +619,7 @@ impl Tensor { | |
TensorDimension::width(w as _), | ||
TensorDimension::depth(3), | ||
], | ||
data: TensorData::JPEG(jpeg_bytes), | ||
data: TensorData::JPEG(jpeg_bytes.into()), | ||
meaning: TensorDataMeaning::Unknown, | ||
meter: None, | ||
}) | ||
|
@@ -608,20 +628,20 @@ impl Tensor { | |
/// Construct a tensor from something that can be turned into a [`image::DynamicImage`]. | ||
/// | ||
/// Requires the `image` feature. | ||
pub fn from_image(image: impl Into<image::DynamicImage>) -> Result<Self, ImageError> { | ||
pub fn from_image(image: impl Into<image::DynamicImage>) -> Result<Self, TensorImageError> { | ||
Self::from_dynamic_image(image.into()) | ||
} | ||
|
||
/// Construct a tensor from [`image::DynamicImage`]. | ||
/// | ||
/// Requires the `image` feature. | ||
pub fn from_dynamic_image(image: image::DynamicImage) -> Result<Self, ImageError> { | ||
pub fn from_dynamic_image(image: image::DynamicImage) -> Result<Self, TensorImageError> { | ||
let (w, h) = (image.width(), image.height()); | ||
|
||
let (depth, data) = match image { | ||
image::DynamicImage::ImageLuma8(image) => (1, TensorData::U8(image.into_raw())), | ||
image::DynamicImage::ImageRgb8(image) => (3, TensorData::U8(image.into_raw())), | ||
image::DynamicImage::ImageRgba8(image) => (4, TensorData::U8(image.into_raw())), | ||
image::DynamicImage::ImageLuma8(image) => (1, TensorData::U8(image.into_raw().into())), | ||
image::DynamicImage::ImageRgb8(image) => (3, TensorData::U8(image.into_raw().into())), | ||
image::DynamicImage::ImageRgba8(image) => (4, TensorData::U8(image.into_raw().into())), | ||
image::DynamicImage::ImageLuma16(image) => { | ||
(1, TensorData::U16(image.into_raw().into())) | ||
} | ||
|
@@ -649,7 +669,7 @@ impl Tensor { | |
} | ||
_ => { | ||
// It is very annoying that DynamicImage is #[non_exhaustive] | ||
return Err(ImageError::UnsupportedImageColorType(image.color())); | ||
return Err(TensorImageError::UnsupportedImageColorType(image.color())); | ||
} | ||
}; | ||
|
||
|
@@ -741,7 +761,7 @@ fn test_concat_and_slice() { | |
size: 4, | ||
name: None, | ||
}], | ||
data: TensorData::JPEG(vec![1, 2, 3, 4]), | ||
data: TensorData::JPEG(vec![1, 2, 3, 4].into()), | ||
meaning: TensorDataMeaning::Unknown, | ||
meter: Some(1000.0), | ||
}]; | ||
|
@@ -752,7 +772,7 @@ fn test_concat_and_slice() { | |
size: 4, | ||
name: None, | ||
}], | ||
data: TensorData::JPEG(vec![5, 6, 7, 8]), | ||
data: TensorData::JPEG(vec![5, 6, 7, 8].into()), | ||
meaning: TensorDataMeaning::Unknown, | ||
meter: None, | ||
}]; | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,4 +1,5 @@ | ||||||
mod mesh_cache; | ||||||
mod tensor_decode_cache; | ||||||
mod tensor_image_cache; | ||||||
|
||||||
use re_log_types::component_types::{self, TensorTrait}; | ||||||
|
@@ -9,6 +10,7 @@ pub use tensor_image_cache::{AsDynamicImage, TensorImageView}; | |||||
pub struct Caches { | ||||||
/// For displaying images efficiently in immediate mode. | ||||||
pub image: tensor_image_cache::ImageCache, | ||||||
pub decode: tensor_decode_cache::DecodeCache, | ||||||
|
||||||
/// For displaying meshes efficiently in immediate mode. | ||||||
pub mesh: mesh_cache::MeshCache, | ||||||
|
@@ -18,18 +20,22 @@ pub struct Caches { | |||||
|
||||||
impl Caches { | ||||||
/// Call once per frame to potentially flush the cache(s). | ||||||
pub fn new_frame(&mut self) { | ||||||
pub fn begin_frame(&mut self) { | ||||||
let max_image_cache_use = 1_000_000_000; | ||||||
self.image.new_frame(max_image_cache_use); | ||||||
let max_decode_cache_use = 1_000_000_000; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the decode cache is RAM-only, we could make it quite a bit bigger. Maybe 8 GB?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but Web!! If we go there, this needs to have a different limit on Web There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch - yeah, 8GB is quite a high limit for a 4iGB system :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 8 still seems high in general. Splitting the difference and going with 4 normally and keeping it as 1 for wasm. |
||||||
self.image.begin_frame(max_image_cache_use); | ||||||
self.decode.begin_frame(max_decode_cache_use); | ||||||
} | ||||||
|
||||||
pub fn purge_memory(&mut self) { | ||||||
let Self { | ||||||
image, | ||||||
decode, | ||||||
tensor_stats, | ||||||
mesh: _, // TODO(emilk) | ||||||
} = self; | ||||||
image.purge_memory(); | ||||||
decode.purge_memory(); | ||||||
tensor_stats.clear(); | ||||||
} | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't
ByteBuffer
a more fititng name name? 😬There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Arrow calls it a
BinaryArray
so this is the Buffer that a binary array deserializes into.