ai-dynamo · ryanolson · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -7,7 +7,7 @@
 *.[Pp][Nn][Gg]      binary
 *.[Zz][Ii][Pp]      binary
 *.[Tt][Gg][Zz]      binary
-
 # Exclude test data files from linguist language detection
 lib/llm/tests/data/** linguist-vendored
 lib/llm/tests/snapshots/** linguist-vendored
+lib/llm/tests/data/replays/deepseek-r1-distill-llama-8b/tokenizer-deepseek-r1-distill-llama-8b.json.bz2 filter=lfs diff=lfs merge=lfs -text
diff --git a/Cargo.lock b/Cargo.lock
@@ -38,6 +38,7 @@ async-stream = { version = "0.3" }
 async-trait = { version = "0.1" }
 async_zmq = { version = "0.4.0" }
 blake3 = { version = "1" }
+bzip2 = { version = "0.6" }  # Purpose: Pure Rust bzip2 compression/decompression
 bytes = { version = "1" }
 chrono = { version = "0.4", default-features = false, features = ["alloc", "std", "clock", "now", "serde"] }
 derive_builder = { version = "0.20" }

diff --git a/deny.toml b/deny.toml
@@ -32,7 +32,8 @@ allow = [
     "BSL-1.0",
     "MPL-2.0",
     "CDLA-Permissive-2.0",
-    "Zlib"
+    "Zlib",
+    "bzip2-1.0.6"
 ]
 
 # TODO exceptions
@@ -43,6 +44,7 @@ allow = [
 # MIT: https://github.com/guidance-ai/llguidance/toktrie_hf_tokenizers
 #  "toktrie_hf_tokenizers",
 
+
 [[licenses.clarify]]
 
 name = "ring"
@@ -51,6 +53,7 @@ license-files = [
     { path = "LICENSE", hash = 0xbd0eed23 }
 ]
 
+
 [bans]
 deny = [
 	# Ensure we don't depend on openssl

diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock
@@ -51,6 +51,7 @@ async-stream = { workspace = true }
 async-trait = { workspace = true }
 async-nats = { workspace = true }
 async_zmq = { workspace = true }
+bzip2 = { workspace = true }
 bytes = { workspace = true }
 chrono = { workspace = true }
 derive_builder = {workspace = true }
@@ -77,7 +78,7 @@ uuid = { workspace = true }
 xxhash-rust = { workspace = true }
 
 akin = "0.4.0"
-blake3 = "1"
+blake3 = { workspace = true }
 bytemuck = "1.22"
 candle-core = { version = "0.8.0" }
 derive-getters = "0.5"

diff --git a/lib/llm/src/lib.rs b/lib/llm/src/lib.rs
@@ -34,6 +34,7 @@ pub mod request_template;
 pub mod tokenizers;
 pub mod tokens;
 pub mod types;
+pub mod utils;
 
 #[cfg(feature = "block-manager")]
 pub mod block_manager;

@@ -7,8 +7,14 @@
 //! during collection, then analyze the recorded data for performance insights.
 
 pub mod logprobs;
+pub mod tokens;
 
+use anyhow::Context as ErrorContext;
+use dynamo_runtime::protocols::annotated::Annotated;
 use futures::Stream;
+use serde::de::DeserializeOwned;
+use serde::Serialize;
+
 use std::pin::Pin;
 use std::task::{Context, Poll};
 use std::time::{Duration, Instant};
@@ -21,6 +27,9 @@ use dynamo_runtime::engine::{
 };
 use std::sync::Arc;
 
+pub use crate::protocols::codec::create_message_stream as parse_sse_stream;
+pub use crate::protocols::convert_sse_stream as convert_sse_to_annotated_stream;
+
 /// Type alias for a receiver of recorded stream data
 pub type RecordedStreamReceiver<R> = oneshot::Receiver<RecordedStream<R>>;
 
@@ -340,6 +349,62 @@ pub fn record_response_stream<R: Data + Clone>(
     record_stream(response_stream, mode)
 }
 
+/// Record a data stream by consuming it entirely and returning the recorded data.
+///
+/// This is a convenience function that operates only in "sink" mode - it consumes
+/// the entire stream and returns all the recorded responses. Unlike the other recording
+/// functions, this doesn't return a pass-through stream.
+///
+/// # Arguments
+/// * `data_stream` - The data stream to consume and record
+///
+/// # Returns
+/// A `RecordedStream` containing all the responses with timing information
+///
+/// # Example
+/// ```rust,ignore
+/// use dynamo_llm::perf::record_data_stream;
+///
+/// let stream = create_my_data_stream();
+/// let recorded = record_data_stream(stream).await;
+/// println!("Recorded {} responses", recorded.response_count());
+/// ```
+pub async fn record_data_stream<R: Data + Clone>(
+    mut data_stream: DataStream<R>,
+) -> RecordedStream<R> {
+    use futures::StreamExt;
+
+    let start_time = Instant::now();
+    let mut responses = Vec::new();
+    let mut sequence_number = 0;
+
+    // Consume the entire stream
+    while let Some(item) = data_stream.next().await {
+        responses.push(TimestampedResponse::new(item, sequence_number));
+        sequence_number += 1;
+    }
+
+    let end_time = Instant::now();
+
+    RecordedStream::new(responses, start_time, end_time)
+}
+
+/// Read an annotated stream from a file and collect all items
+pub fn read_annotated_stream_from_file<T: Serialize + DeserializeOwned>(
+    path: &str,
+) -> Result<DataStream<Annotated<T>>, anyhow::Error> {
+    // Read the entire file as a string
+    let data = std::fs::read_to_string(path)
+        .map_err(|e| anyhow::anyhow!("Failed to read file: {}", e))
+        .with_context(|| format!("Failed to read file: {}", path))?;
+
+    // Create SSE stream from the string data
+    let sse_stream = parse_sse_stream(&data);
+
+    // Convert SSE messages to annotated stream
+    Ok(convert_sse_to_annotated_stream::<T>(Box::pin(sse_stream)))
+}
+
 #[cfg(test)]
 pub mod tests {
     use super::*;

@@ -568,9 +568,10 @@ mod tests {
     type TestTokenAlternative = (&'static str, f32);
     type TestTokenData = (&'static str, f32, Vec<TestTokenAlternative>);
     type TestTokenDataVec = Vec<TestTokenData>;
-    use crate::perf::{record_stream_with_context, RecordingMode, TimestampedResponse};
-    use crate::protocols::codec::create_message_stream;
-    use crate::protocols::convert_sse_stream;
+    use crate::perf::{
+        read_annotated_stream_from_file, record_stream_with_context, RecordingMode,
+        TimestampedResponse,
+    };
     use approx::assert_abs_diff_eq;
     use async_openai::types::{
         ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
@@ -1449,21 +1450,13 @@ mod tests {
 
     #[tokio::test]
     async fn test_real_sse_stream_analysis() {
-        // Read the real SSE data with logprobs
-        let data = std::fs::read_to_string(
+        let stream = read_annotated_stream_from_file::<NvCreateChatCompletionStreamResponse>(
             "tests/data/replays/deepseek-r1-distill-llama-8b/chat-completions.stream.1",
         )
-        .expect("Failed to read test data file");
-
-        // Create stream from SSE data
-        let sse_stream = create_message_stream(&data);
-
-        // Convert SSE messages to our stream response format using the existing converter
-        let response_stream =
-            convert_sse_stream::<NvCreateChatCompletionStreamResponse>(Box::pin(sse_stream));
+        .unwrap();
 
         // Filter out errors and extract successful responses
-        let filtered_stream = response_stream.filter_map(|annotated| async move { annotated.data });
+        let filtered_stream = stream.filter_map(|annotated| async move { annotated.data });
 
         // Create a mock context for recording
         let ctx = Arc::new(MockContext::new());