langchain-ai · Jacob Lee (jacoblee93) · Aug 24, 2023 · Aug 22, 2023 · Aug 22, 2023 · Aug 23, 2023
diff --git a/docs/extras/modules/model_io/models/llms/integrations/bedrock.mdx b/docs/extras/modules/model_io/models/llms/integrations/bedrock.mdx
@@ -8,12 +8,12 @@ from leading AI startups and Amazon available via an API. You can choose from a
 You'll need to install a few official AWS packages as peer dependencies:
 
 ```bash npm2yarn
-npm install @aws-crypto/sha256-js @aws-sdk/credential-provider-node @aws-sdk/protocol-http @aws-sdk/signature-v4
+npm install @aws-crypto/sha256-js @aws-sdk/credential-provider-node @aws-sdk/protocol-http @aws-sdk/signature-v4 @smithy/eventstream-codec @smithy/util-utf8
 ```
 
 ## Usage
 
 import CodeBlock from "@theme/CodeBlock";
 import BedrockExample from "@examples/models/llm/bedrock.ts";
 
-<CodeBlock language="typescript">{BedrockExample}</CodeBlock>
+<CodeBlock language="typescript">{BedrockExample}</CodeBlock>
diff --git a/langchain/package.json b/langchain/package.json
@@ -614,6 +614,8 @@
     "@planetscale/database": "^1.8.0",
     "@qdrant/js-client-rest": "^1.2.0",
     "@raycast/api": "^1.55.2",
+    "@smithy/eventstream-codec": "^2.0.5",
+    "@smithy/util-utf8": "^2.0.0",
     "@supabase/postgrest-js": "^1.1.1",
     "@supabase/supabase-js": "^2.10.0",
     "@tensorflow-models/universal-sentence-encoder": "^1.3.3",
@@ -722,6 +724,8 @@
     "@planetscale/database": "^1.8.0",
     "@qdrant/js-client-rest": "^1.2.0",
     "@raycast/api": "^1.55.2",
+    "@smithy/eventstream-codec": "^2.0.5",
+    "@smithy/util-utf8": "^2.0.0",
     "@supabase/postgrest-js": "^1.1.1",
     "@supabase/supabase-js": "^2.10.0",
     "@tensorflow-models/universal-sentence-encoder": "*",
@@ -848,6 +852,12 @@
     "@raycast/api": {
       "optional": true
     },
+    "@smithy/eventstream-codec": {
+      "optional": true
+    },
+    "@smithy/util-utf8": {
+      "optional": true
+    },
     "@supabase/postgrest-js": {
       "optional": true
     },

diff --git a/langchain/src/llms/bedrock.ts b/langchain/src/llms/bedrock.ts
@@ -1,10 +1,14 @@
 import { SignatureV4 } from "@aws-sdk/signature-v4";
 import { defaultProvider } from "@aws-sdk/credential-provider-node";
 import { HttpRequest } from "@aws-sdk/protocol-http";
+import { EventStreamCodec } from "@smithy/eventstream-codec";
+import { fromUtf8, toUtf8 } from "@smithy/util-utf8";
 import { Sha256 } from "@aws-crypto/sha256-js";
 import type { AwsCredentialIdentity, Provider } from "@aws-sdk/types";
 import { getEnvironmentVariable } from "../util/env.js";
 import { LLM, BaseLLMParams } from "./base.js";
+import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
+import { GenerationChunk } from "../schema/index.js";
 
 type Dict = { [key: string]: unknown };
 type CredentialType = AwsCredentialIdentity | Provider<AwsCredentialIdentity>;
@@ -20,22 +24,29 @@ class BedrockLLMInputOutputAdapter {
   that LLM model expects. Also, provides a helper function to extract
   the generated text from the model response. */
 
-  static prepareInput(provider: string, prompt: string): Dict {
+  static prepareInput(
+    provider: string,
+    prompt: string,
+    maxTokens = 50,
+    temperature = 0
+  ): Dict {
     const inputBody: Dict = {};
 
-    if (provider === "anthropic" || provider === "ai21") {
+    if (provider === "anthropic") {
+      inputBody.prompt = prompt;
+      inputBody.max_tokens_to_sample = maxTokens;
+      inputBody.temperature = temperature;
+    } else if (provider === "ai21") {
       inputBody.prompt = prompt;
+      inputBody.maxTokens = maxTokens;
+      inputBody.temperature = temperature;
     } else if (provider === "amazon") {
       inputBody.inputText = prompt;
-      inputBody.textGenerationConfig = {};
-    } else {
-      inputBody.inputText = prompt;
-    }
-
-    if (provider === "anthropic" && !("max_tokens_to_sample" in inputBody)) {
-      inputBody.max_tokens_to_sample = 50;
+      inputBody.textGenerationConfig = {
+        maxTokenCount: maxTokens,
+        temperature,
+      };
     }
-
     return inputBody;
   }
 
@@ -50,9 +61,9 @@ class BedrockLLMInputOutputAdapter {
     if (provider === "anthropic") {
       return responseBody.completion;
     } else if (provider === "ai21") {
-      return responseBody.completions[0].data.text;
+      return responseBody.data.text;
     }
-    return responseBody.results[0].outputText;
+    return responseBody.outputText;
   }
 }
 
@@ -110,6 +121,8 @@ export class Bedrock extends LLM implements BedrockInput {
 
   fetchFn: typeof fetch;
 
+  codec: EventStreamCodec = new EventStreamCodec(toUtf8, fromUtf8);
+
   get lc_secrets(): { [key: string]: string } | undefined {
     return {};
   }
@@ -152,17 +165,39 @@ export class Bedrock extends LLM implements BedrockInput {
     Example:
       response = model.call("Tell me a joke.")
   */
-  async _call(prompt: string): Promise<string> {
+  async _call(
+    prompt: string,
+    options: this["ParsedCallOptions"],
+    runManager?: CallbackManagerForLLMRun
+  ): Promise<string> {
+    const chunks = [];
+    for await (const chunk of this._streamResponseChunks(
+      prompt,
+      options,
+      runManager
+    )) {
+      chunks.push(chunk);
+    }
+    return chunks.map((chunk) => chunk.text).join("");
+  }
+
+  async *_streamResponseChunks(
+    prompt: string,
+    options: this["ParsedCallOptions"],
+    runManager?: CallbackManagerForLLMRun
+  ): AsyncGenerator<GenerationChunk> {
     const provider = this.model.split(".")[0];
     const service = "bedrock";
 
     const inputBody = BedrockLLMInputOutputAdapter.prepareInput(
       provider,
-      prompt
+      prompt,
+      this.maxTokens,
+      this.temperature
     );
 
     const url = new URL(
-      `https://${service}.${this.region}.amazonaws.com/model/${this.model}/invoke`
+      `https://${service}.${this.region}.amazonaws.com/model/${this.model}/invoke-with-response-stream`
     );
 
     const request = new HttpRequest({
@@ -190,11 +225,15 @@ export class Bedrock extends LLM implements BedrockInput {
     const signedRequest = await signer.sign(request);
 
     // Send request to AWS using the low-level fetch API
-    const response = await this.fetchFn(url, {
-      headers: signedRequest.headers,
-      body: signedRequest.body,
-      method: signedRequest.method,
-    });
+    const response = await this.caller.callWithOptions(
+      { signal: options.signal },
+      async () =>
+        this.fetchFn(url, {
+          headers: signedRequest.headers,
+          body: signedRequest.body,
+          method: signedRequest.method,
+        })
+    );
 
     if (response.status < 200 || response.status >= 300) {
       throw Error(
@@ -204,13 +243,40 @@ export class Bedrock extends LLM implements BedrockInput {
       );
     }
 
-    const responseJson = await response.json();
-
-    const text = BedrockLLMInputOutputAdapter.prepareOutput(
-      provider,
-      responseJson
-    );
+    const reader = response.body?.getReader();
+    for await (const chunk of this._readChunks(reader)) {
+      const event = this.codec.decode(chunk);
+      if (
+        event.headers[":event-type"].value !== "chunk" ||
+        event.headers[":content-type"].value !== "application/json"
+      ) {
+        throw Error(`Failed to get event chunk: got ${chunk}`);
+      }
+      const body = JSON.parse(
+        Buffer.from(
+          JSON.parse(new TextDecoder("utf-8").decode(event.body)).bytes,
+          "base64"
+        ).toString()
+      );
+      const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, body);
+      yield new GenerationChunk({
+        text,
+        generationInfo: {},
+      });
+      await runManager?.handleLLMNewToken(text);
+    }
+  }
 
-    return text;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  _readChunks(reader: any) {
+    return {
+      async *[Symbol.asyncIterator]() {
+        let readResult = await reader.read();
+        while (!readResult.done) {
+          yield readResult.value;
+          readResult = await reader.read();
+        }
+      },
+    };
   }
 }