feat (ai/core): add line chunking mode to smoothStream (#4263)

vercel · Jan 4, 2025 · a8f3242 · a8f3242
1 parent 453d139
commit a8f3242
Show file tree

Hide file tree

Showing 5 changed files with 154 additions and 5 deletions.
diff --git a/.changeset/hungry-rivers-grow.md b/.changeset/hungry-rivers-grow.md
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+feat (ai/core): add line chunking mode to smoothStream
diff --git a/content/docs/07-reference/01-ai-sdk-core/80-smooth-stream.mdx b/content/docs/07-reference/01-ai-sdk-core/80-smooth-stream.mdx
@@ -10,14 +10,15 @@ for the `streamText` `transform` option
 to smooth out text streaming by buffering and releasing complete words with configurable delays.
 This creates a more natural reading experience when streaming text responses.
 
-```ts highlight={"6-8"}
+```ts highlight={"6-9"}
 import { smoothStream, streamText } from 'ai';
 
 const result = streamText({
   model,
   prompt,
   experimental_transform: smoothStream({
     delayInMs: 20, // optional: defaults to 10ms
+    chunking: 'line', // optional: defaults to 'word'
   }),
 });
 ```
@@ -39,6 +40,13 @@ const result = streamText({
       description:
         'The delay in milliseconds between outputting each word. Defaults to 10ms. Set to 0 to disable delays.',
     },
+    {
+      name: 'chunking',
+      type: '"word" | "line"',
+      isOptional: true,
+      description:
+        'Controls how the text is chunked for streaming. Use "word" to stream word by word (default), or "line" to stream line by line.',
+    },
   ]}
 />
 

diff --git a/examples/ai-core/src/stream-text/azure-smooth-line.ts b/examples/ai-core/src/stream-text/azure-smooth-line.ts
@@ -0,0 +1,21 @@
+import { azure } from '@ai-sdk/azure';
+import { smoothStream, streamText } from 'ai';
+import 'dotenv/config';
+
+async function main() {
+  const result = streamText({
+    model: azure('gpt-4o'), // use your own deployment
+    prompt: 'Invent a new holiday and describe its traditions.',
+    experimental_transform: smoothStream({ chunking: 'line' }),
+  });
+
+  for await (const textPart of result.textStream) {
+    process.stdout.write(textPart);
+  }
+
+  console.log();
+  console.log('Token usage:', await result.usage);
+  console.log('Finish reason:', await result.finishReason);
+}
+
+main().catch(console.error);
diff --git a/packages/ai/core/generate-text/smooth-stream.test.ts b/packages/ai/core/generate-text/smooth-stream.test.ts
@@ -190,4 +190,114 @@ describe('smoothStream', () => {
       },
     ]);
   });
+
+  it('should split text by lines when using line chunking mode', async () => {
+    const events: any[] = [];
+
+    const stream = convertArrayToReadableStream([
+      {
+        textDelta: 'First line\nSecond line\nThird line with more text\n',
+        type: 'text-delta',
+      },
+      { textDelta: 'Partial line', type: 'text-delta' },
+      { textDelta: ' continues\nFinal line\n', type: 'text-delta' },
+      { type: 'step-finish' },
+      { type: 'finish' },
+    ]).pipeThrough(
+      smoothStream({
+        delayInMs: 10,
+        chunking: 'line',
+        _internal: {
+          delay: () => {
+            events.push('delay');
+            return Promise.resolve();
+          },
+        },
+      })({ tools: {} }),
+    );
+
+    const reader = stream.getReader();
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      events.push(value);
+    }
+
+    expect(events).toEqual([
+      'delay',
+      {
+        textDelta: 'First line\n',
+        type: 'text-delta',
+      },
+      'delay',
+      {
+        textDelta: 'Second line\n',
+        type: 'text-delta',
+      },
+      'delay',
+      {
+        textDelta: 'Third line with more text\n',
+        type: 'text-delta',
+      },
+      'delay',
+      {
+        textDelta: 'Partial line continues\n',
+        type: 'text-delta',
+      },
+      'delay',
+      {
+        textDelta: 'Final line\n',
+        type: 'text-delta',
+      },
+      {
+        type: 'step-finish',
+      },
+      {
+        type: 'finish',
+      },
+    ]);
+  });
+
+  it('should handle text without line endings in line chunking mode', async () => {
+    const events: any[] = [];
+
+    const stream = convertArrayToReadableStream([
+      { textDelta: 'Text without', type: 'text-delta' },
+      { textDelta: ' any line', type: 'text-delta' },
+      { textDelta: ' breaks', type: 'text-delta' },
+      { type: 'step-finish' },
+      { type: 'finish' },
+    ]).pipeThrough(
+      smoothStream({
+        delayInMs: 10,
+        chunking: 'line',
+        _internal: {
+          delay: () => {
+            events.push('delay');
+            return Promise.resolve();
+          },
+        },
+      })({ tools: {} }),
+    );
+
+    const reader = stream.getReader();
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      events.push(value);
+    }
+
+    expect(events).toEqual([
+      {
+        textDelta: 'Text without any line breaks',
+        type: 'text-delta',
+      },
+      {
+        type: 'step-finish',
+      },
+      {
+        type: 'finish',
+      },
+    ]);
+  });
 });
diff --git a/packages/ai/core/generate-text/smooth-stream.ts b/packages/ai/core/generate-text/smooth-stream.ts
@@ -6,14 +6,17 @@ import { TextStreamPart } from './stream-text-result';
  * Smooths text streaming output.
  *
  * @param delayInMs - The delay in milliseconds between each chunk. Defaults to 10ms.
+ * @param chunking - Controls how the text is chunked for streaming. Use "word" to stream word by word (default), or "line" to stream line by line.
+ *
  * @returns A transform stream that smooths text streaming output.
  */
 export function smoothStream<TOOLS extends Record<string, CoreTool>>({
   delayInMs = 10,
+  chunking = 'word',
   _internal: { delay = originalDelay } = {},
 }: {
   delayInMs?: number;
-
+  chunking?: 'word' | 'line';
   /**
    * Internal. For test use only. May change without notice.
    */
@@ -45,9 +48,11 @@ export function smoothStream<TOOLS extends Record<string, CoreTool>>({
 
         buffer += chunk.textDelta;
 
-        // Stream out complete words including their optional leading
-        // and required trailing whitespace sequences
-        const regexp = /\s*\S+\s+/m;
+        const regexp =
+          chunking === 'line'
+            ? /[^\n]*\n/m // Match full lines ending with newline
+            : /\s*\S+\s+/m; // Match words with whitespace
+
         while (regexp.test(buffer)) {
           const chunk = buffer.match(regexp)![0];
           controller.enqueue({ type: 'text-delta', textDelta: chunk });