Skip to content

Commit b288852

Browse files
authored
Feature/Custom Retriever (#3119)
add custom retriever
1 parent 2e45851 commit b288852

File tree

2 files changed

+160
-0
lines changed

2 files changed

+160
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import { get } from 'lodash'
2+
import { Document } from '@langchain/core/documents'
3+
import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from '@langchain/core/vectorstores'
4+
import { INode, INodeData, INodeParams, INodeOutputsValue } from '../../../src/Interface'
5+
import { handleEscapeCharacters } from '../../../src'
6+
7+
const defaultReturnFormat = '{{context}}\nSource: {{metadata.source}}'
8+
9+
class CustomRetriever_Retrievers implements INode {
10+
label: string
11+
name: string
12+
version: number
13+
description: string
14+
type: string
15+
icon: string
16+
category: string
17+
baseClasses: string[]
18+
inputs: INodeParams[]
19+
outputs: INodeOutputsValue[]
20+
21+
constructor() {
22+
this.label = 'Custom Retriever'
23+
this.name = 'customRetriever'
24+
this.version = 1.0
25+
this.type = 'CustomRetriever'
26+
this.icon = 'customRetriever.svg'
27+
this.category = 'Retrievers'
28+
this.description = 'Return results based on predefined format'
29+
this.baseClasses = [this.type, 'BaseRetriever']
30+
this.inputs = [
31+
{
32+
label: 'Vector Store',
33+
name: 'vectorStore',
34+
type: 'VectorStore'
35+
},
36+
{
37+
label: 'Query',
38+
name: 'query',
39+
type: 'string',
40+
description: 'Query to retrieve documents from retriever. If not specified, user question will be used',
41+
optional: true,
42+
acceptVariable: true
43+
},
44+
{
45+
label: 'Result Format',
46+
name: 'resultFormat',
47+
type: 'string',
48+
rows: 4,
49+
description:
50+
'Format to return the results in. Use {{context}} to insert the pageContent of the document and {{metadata.key}} to insert metadata values.',
51+
default: defaultReturnFormat
52+
},
53+
{
54+
label: 'Top K',
55+
name: 'topK',
56+
description: 'Number of top results to fetch. Default to vector store topK',
57+
placeholder: '4',
58+
type: 'number',
59+
additionalParams: true,
60+
optional: true
61+
}
62+
]
63+
this.outputs = [
64+
{
65+
label: 'Custom Retriever',
66+
name: 'retriever',
67+
baseClasses: this.baseClasses
68+
},
69+
{
70+
label: 'Document',
71+
name: 'document',
72+
description: 'Array of document objects containing metadata and pageContent',
73+
baseClasses: ['Document', 'json']
74+
},
75+
{
76+
label: 'Text',
77+
name: 'text',
78+
description: 'Concatenated string from pageContent of documents',
79+
baseClasses: ['string', 'json']
80+
}
81+
]
82+
}
83+
84+
async init(nodeData: INodeData, input: string): Promise<any> {
85+
const vectorStore = nodeData.inputs?.vectorStore as VectorStore
86+
const query = nodeData.inputs?.query as string
87+
const topK = nodeData.inputs?.topK as string
88+
const resultFormat = nodeData.inputs?.resultFormat as string
89+
90+
const output = nodeData.outputs?.output as string
91+
92+
const retriever = CustomRetriever.fromVectorStore(vectorStore, {
93+
resultFormat,
94+
topK: topK ? parseInt(topK, 10) : (vectorStore as any)?.k ?? 4
95+
})
96+
97+
if (output === 'retriever') return retriever
98+
else if (output === 'document') return await retriever.getRelevantDocuments(query ? query : input)
99+
else if (output === 'text') {
100+
let finaltext = ''
101+
102+
const docs = await retriever.getRelevantDocuments(query ? query : input)
103+
104+
for (const doc of docs) finaltext += `${doc.pageContent}\n`
105+
106+
return handleEscapeCharacters(finaltext, false)
107+
}
108+
109+
return retriever
110+
}
111+
}
112+
113+
type RetrieverInput<V extends VectorStore> = Omit<VectorStoreRetrieverInput<V>, 'k'> & {
114+
topK?: number
115+
resultFormat?: string
116+
}
117+
118+
class CustomRetriever<V extends VectorStore> extends VectorStoreRetriever<V> {
119+
resultFormat: string
120+
topK = 4
121+
122+
constructor(input: RetrieverInput<V>) {
123+
super(input)
124+
this.topK = input.topK ?? this.topK
125+
this.resultFormat = input.resultFormat ?? this.resultFormat
126+
}
127+
128+
async getRelevantDocuments(query: string): Promise<Document[]> {
129+
const results = await this.vectorStore.similaritySearchWithScore(query, this.topK, this.filter)
130+
131+
const finalDocs: Document[] = []
132+
for (const result of results) {
133+
let res = this.resultFormat.replace(/{{context}}/g, result[0].pageContent)
134+
res = replaceMetadata(res, result[0].metadata)
135+
finalDocs.push(
136+
new Document({
137+
pageContent: res,
138+
metadata: result[0].metadata
139+
})
140+
)
141+
}
142+
return finalDocs
143+
}
144+
145+
static fromVectorStore<V extends VectorStore>(vectorStore: V, options: Omit<RetrieverInput<V>, 'vectorStore'>) {
146+
return new this<V>({ ...options, vectorStore })
147+
}
148+
}
149+
150+
function replaceMetadata(template: string, metadata: Record<string, any>): string {
151+
const metadataRegex = /{{metadata\.([\w.]+)}}/g
152+
153+
return template.replace(metadataRegex, (match, path) => {
154+
const value = get(metadata, path)
155+
return value !== undefined ? String(value) : match
156+
})
157+
}
158+
159+
module.exports = { nodeClass: CustomRetriever_Retrievers }
Loading

0 commit comments

Comments
 (0)