Skip to content

Commit

Permalink
Adds tool calling output parser (#3232)
Browse files Browse the repository at this point in the history
  • Loading branch information
jacoblee93 authored Nov 11, 2023
1 parent 0cb640f commit 3289345
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 6 deletions.
21 changes: 18 additions & 3 deletions docs/docs/use_cases/extraction.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,22 @@ Most APIs and databases still deal with structured information. Therefore, in or
This work is extremely related to output parsing. Output parsers are responsible for instructing the LLM to respond in a specific format. In this case, the output parsers specify the format of the data you would like to extract from the document.
Then, in addition to the output format instructions, the prompt should also contain the data you would like to extract information from.

You can also try out the [extraction chain](../modules/chains/popular/structured_output), an LLMChain specialized to use OpenAI functions to generate output
matching an input schema.

While normal output parsers are good enough for basic structuring of response data, when doing extraction you often want to extract more complicated or nested structures.

## With tool/function calling

Tool/function calling is a powerful way to perform extraction. At a high level, function calling encourages the model to respond in a structured format.
By specifying one or more JSON schemas that you want the LLM to use, you can guide the LLM to "fill in the blanks" and populate proper values for the keys to the JSON.

Here's a concrete example using OpenAI's tool calling features. Note that this requires either the `gpt-3.5-turbo-1106` or `gpt-4-1106-preview` models.

We'll use [Zod](https://zod.dev/), a popular open source package, to format schema in OpenAI's tool format:

```bash npm2yarn
$ npm install zod zod-to-json-schema
```

import CodeBlock from "@theme/CodeBlock";
import ToolCalling from "@examples/extraction/openai_tool_calling_extraction.ts";

<CodeBlock language="typescript">{ToolCalling}</CodeBlock>
51 changes: 51 additions & 0 deletions examples/src/extraction/openai_tool_calling_extraction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
import { ChatPromptTemplate } from "langchain/prompts";
import { ChatOpenAI } from "langchain/chat_models/openai";
import { JsonOutputToolsParser } from "langchain/output_parsers";

const EXTRACTION_TEMPLATE = `Extract and save the relevant entities mentioned \
in the following passage together with their properties.
If a property is not present and is not required in the function parameters, do not include it in the output.`;

const prompt = ChatPromptTemplate.fromMessages([
["system", EXTRACTION_TEMPLATE],
["human", "{input}"],
]);

const person = z.object({
name: z.string().describe("The person's name"),
age: z.string().describe("The person's age"),
});

const model = new ChatOpenAI({
modelName: "gpt-3.5-turbo-1106",
temperature: 0,
}).bind({
tools: [
{
type: "function",
function: {
name: "person",
description: "A person",
parameters: zodToJsonSchema(person),
},
},
],
});

const parser = new JsonOutputToolsParser();
const chain = prompt.pipe(model).pipe(parser);

const res = await chain.invoke({
input: "jane is 2 and bob is 3",
});

console.log(res);
/*
[
{ name: 'person', arguments: { name: 'jane', age: '2' } },
{ name: 'person', arguments: { name: 'bob', age: '3' } }
]
*/
4 changes: 4 additions & 0 deletions langchain/src/output_parsers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ export {
JsonOutputFunctionsParser,
JsonKeyOutputFunctionsParser,
} from "../output_parsers/openai_functions.js";
export {
type ParsedToolCall,
JsonOutputToolsParser,
} from "../output_parsers/openai_tools.js";
6 changes: 3 additions & 3 deletions langchain/src/output_parsers/openai_functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export class OutputFunctionsParser extends BaseLLMOutputParser<string> {
return "OutputFunctionsParser";
}

lc_namespace = ["langchain", "chains", "openai_functions"];
lc_namespace = ["langchain", "output_parsers"];

lc_serializable = true;

Expand Down Expand Up @@ -84,7 +84,7 @@ export class JsonOutputFunctionsParser extends BaseCumulativeTransformOutputPars
return "JsonOutputFunctionsParser";
}

lc_namespace = ["langchain", "chains", "openai_functions"];
lc_namespace = ["langchain", "output_parsers"];

lc_serializable = true;

Expand Down Expand Up @@ -179,7 +179,7 @@ export class JsonKeyOutputFunctionsParser<
return "JsonKeyOutputFunctionsParser";
}

lc_namespace = ["langchain", "chains", "openai_functions"];
lc_namespace = ["langchain", "output_parsers"];

lc_serializable = true;

Expand Down
51 changes: 51 additions & 0 deletions langchain/src/output_parsers/openai_tools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { BaseLLMOutputParser } from "../schema/output_parser.js";
import type { ChatGeneration } from "../schema/index.js";

export type ParsedToolCall = {
name: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
arguments: Record<string, any>;
};

/**
* Class for parsing the output of an LLM into a JSON object. Uses an
* instance of `OutputToolsParser` to parse the output.
*/
export class JsonOutputToolsParser extends BaseLLMOutputParser<
ParsedToolCall[]
> {
static lc_name() {
return "JsonOutputToolsParser";
}

lc_namespace = ["langchain", "output_parsers"];

lc_serializable = true;

/**
* Parses the output and returns a JSON object. If `argsOnly` is true,
* only the arguments of the function call are returned.
* @param generations The output of the LLM to parse.
* @returns A JSON object representation of the function call or its arguments.
*/
async parseResult(generations: ChatGeneration[]): Promise<ParsedToolCall[]> {
const toolCalls = generations[0].message.additional_kwargs.tool_calls;
if (!toolCalls) {
throw new Error(
`No tools_call in message ${JSON.stringify(generations)}`
);
}
const clonedToolCalls = JSON.parse(JSON.stringify(toolCalls));
const parsedToolCalls = [];
for (const toolCall of clonedToolCalls) {
if (toolCall.function !== undefined) {
const functionArgs = toolCall.function.arguments;
parsedToolCalls.push({
name: toolCall.function.name,
arguments: JSON.parse(functionArgs),
});
}
}
return parsedToolCalls;
}
}
45 changes: 45 additions & 0 deletions langchain/src/output_parsers/tests/openai_tools.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* eslint-disable @typescript-eslint/no-explicit-any */

import { expect, test } from "@jest/globals";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";

import { ChatOpenAI } from "../../chat_models/openai.js";
import { ChatPromptTemplate } from "../../prompts/index.js";
import { JsonOutputToolsParser } from "../openai_tools.js";

const schema = z.object({
setup: z.string().describe("The setup for the joke"),
punchline: z.string().describe("The punchline to the joke"),
});

test("Extraction", async () => {
const prompt = ChatPromptTemplate.fromTemplate(
`tell me two jokes about {foo}`
);
const model = new ChatOpenAI({
modelName: "gpt-3.5-turbo-1106",
temperature: 0,
}).bind({
tools: [
{
type: "function",
function: {
name: "joke",
description: "A joke",
parameters: zodToJsonSchema(schema),
},
},
],
});

const parser = new JsonOutputToolsParser();
const chain = prompt.pipe(model).pipe(parser);

const res = await chain.invoke({
foo: "bears",
});

console.log(res);
expect(res.length).toBe(2);
});

0 comments on commit 3289345

Please sign in to comment.